linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Andrey Ryabinin <arbn@yandex-team.com>
To: linux-kernel@vger.kernel.org
Cc: Alexander Graf <graf@amazon.com>,
	James Gowans <jgowans@amazon.com>,
	Mike Rapoport <rppt@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-mm@kvack.org, Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	x86@kernel.org, "H . Peter Anvin" <hpa@zytor.com>,
	Eric Biederman <ebiederm@xmission.com>,
	kexec@lists.infradead.org, Pratyush Yadav <ptyadav@amazon.de>,
	Jason Gunthorpe <jgg@nvidia.com>,
	Pasha Tatashin <pasha.tatashin@soleen.com>,
	David Rientjes <rientjes@google.com>,
	Andrey Ryabinin <arbn@yandex-team.com>
Subject: [PATCH v2 4/7] kexec, kstate: delay loading of kexec segments
Date: Mon, 10 Mar 2025 13:03:15 +0100	[thread overview]
Message-ID: <20250310120318.2124-5-arbn@yandex-team.com> (raw)
In-Reply-To: <20250310120318.2124-1-arbn@yandex-team.com>

KSTATE's purpose is to preserve some memory across kexec. To make this
happen kexec needs to choose destination ranges after the KSTATE, so
these ranges doesn't collide with KSTATE preserved memory.

Kexec chooses destination ranges on the kexec load stage which might
happen long before the actual reboot to the new kernel. This means that
KSTATE must know all preserved memory before the kexec_file_load(), unless
we delay loading of kexec segments/destination addresses to the latter,
at the point of reboot to the new kernel. So let's do that.

Signed-off-by: Andrey Ryabinin <arbn@yandex-team.com>
---
 include/linux/kexec.h   |   1 +
 kernel/kexec_core.c     |   6 ++
 kernel/kexec_file.c     | 144 ++++++++++++++++++++++++++--------------
 kernel/kexec_internal.h |   6 ++
 4 files changed, 108 insertions(+), 49 deletions(-)

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index bd82f04888a1..539aaacfd3fd 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -377,6 +377,7 @@ extern void machine_kexec(struct kimage *image);
 extern int machine_kexec_prepare(struct kimage *image);
 extern void machine_kexec_cleanup(struct kimage *image);
 extern int kernel_kexec(void);
+extern int kexec_file_load_segments(struct kimage *image);
 extern struct page *kimage_alloc_control_pages(struct kimage *image,
 						unsigned int order);
 
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 647ab5705c37..7c79addeb93b 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1017,6 +1017,12 @@ int kernel_kexec(void)
 		goto Unlock;
 	}
 
+	if (kexec_late_load(kexec_image)) {
+		error = kexec_file_load_segments(kexec_image);
+		if (error)
+			goto Unlock;
+	}
+
 #ifdef CONFIG_KEXEC_JUMP
 	if (kexec_image->preserve_context) {
 		/*
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 8ecd34071bfa..634e2ed4cc4c 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -187,6 +187,34 @@ kimage_validate_signature(struct kimage *image)
 }
 #endif
 
+static int kimage_add_buffers(struct kimage *image)
+{
+	void *ldata;
+	int ret = 0;
+
+	/* IMA needs to pass the measurement list to the next kernel. */
+	ima_add_kexec_buffer(image);
+
+	ret = kstate_load_migrate_buf(image);
+	if (ret)
+		goto out;
+
+	/* Call image load handler */
+	ldata = kexec_image_load_default(image);
+
+	if (IS_ERR(ldata)) {
+		ret = PTR_ERR(ldata);
+		goto out;
+	}
+
+	image->image_loader_data = ldata;
+out:
+	/* In case of error, free up all allocated memory in this function */
+	if (ret)
+		kimage_file_post_load_cleanup(image);
+	return ret;
+
+}
 /*
  * In file mode list of segments is prepared by kernel. Copy relevant
  * data from user space, do error checking, prepare segment list
@@ -197,7 +225,6 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
 			     unsigned long cmdline_len, unsigned flags)
 {
 	ssize_t ret;
-	void *ldata;
 
 	ret = kernel_read_file_from_fd(kernel_fd, 0, &image->kernel_buf,
 				       KEXEC_FILE_SIZE_MAX, NULL,
@@ -251,22 +278,6 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
 				  image->cmdline_buf_len - 1);
 	}
 
-	/* IMA needs to pass the measurement list to the next kernel. */
-	ima_add_kexec_buffer(image);
-
-	ret = kstate_load_migrate_buf(image);
-	if (ret)
-		goto out;
-
-	/* Call image load handler */
-	ldata = kexec_image_load_default(image);
-
-	if (IS_ERR(ldata)) {
-		ret = PTR_ERR(ldata);
-		goto out;
-	}
-
-	image->image_loader_data = ldata;
 out:
 	/* In case of error, free up all allocated memory in this function */
 	if (ret)
@@ -303,10 +314,6 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
 	if (ret)
 		goto out_free_image;
 
-	ret = sanity_check_segment_list(image);
-	if (ret)
-		goto out_free_post_load_bufs;
-
 	ret = -ENOMEM;
 	image->control_code_page = kimage_alloc_control_pages(image,
 					   get_order(KEXEC_CONTROL_PAGE_SIZE));
@@ -334,6 +341,70 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
 	return ret;
 }
 
+static int kimage_post_load(struct kimage *image)
+{
+	int ret, i;
+
+	ret = kexec_calculate_store_digests(image);
+	if (ret)
+		goto out;
+
+	kexec_dprintk("nr_segments = %lu\n", image->nr_segments);
+	for (i = 0; i < image->nr_segments; i++) {
+		struct kexec_segment *ksegment;
+
+		ksegment = &image->segment[i];
+		kexec_dprintk("segment[%d]: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
+			      i, ksegment->buf, ksegment->bufsz, ksegment->mem,
+			      ksegment->memsz);
+
+		ret = kimage_load_segment(image, &image->segment[i]);
+		if (ret)
+			goto out;
+	}
+
+	kimage_terminate(image);
+
+	ret = machine_kexec_post_load(image);
+	if (ret)
+		goto out;
+
+	kexec_dprintk("kexec_file_load: type:%u, start:0x%lx head:0x%lx\n",
+		image->type, image->start, image->head);
+out:
+	return ret;
+}
+
+int kexec_file_load_segments(struct kimage *image)
+{
+	int ret;
+
+	ret = kimage_add_buffers(image);
+	if (ret) {
+		pr_err("failed to add kimage buffers %d\n", ret);
+		goto out;
+	}
+
+	ret = sanity_check_segment_list(image);
+	if (ret) {
+		pr_err("sanity check failed %d\n", ret);
+		goto out;
+	}
+
+	ret = kimage_post_load(image);
+	if (ret)
+		pr_err("kimage post load failed %d\n", ret);
+
+out:
+	/*
+	 * Free up any temporary buffers allocated which are not needed
+	 * after image has been loaded
+	 */
+	kimage_file_post_load_cleanup(image);
+
+	return ret;
+}
+
 SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
 		unsigned long, cmdline_len, const char __user *, cmdline_ptr,
 		unsigned long, flags)
@@ -341,7 +412,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
 	int image_type = (flags & KEXEC_FILE_ON_CRASH) ?
 			 KEXEC_TYPE_CRASH : KEXEC_TYPE_DEFAULT;
 	struct kimage **dest_image, *image;
-	int ret = 0, i;
+	int ret = 0;
 
 	/* We only trust the superuser with rebooting the system. */
 	if (!kexec_load_permitted(image_type))
@@ -398,37 +469,12 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
 	if (ret)
 		goto out;
 
-	ret = kexec_calculate_store_digests(image);
-	if (ret)
-		goto out;
-
-	kexec_dprintk("nr_segments = %lu\n", image->nr_segments);
-	for (i = 0; i < image->nr_segments; i++) {
-		struct kexec_segment *ksegment;
-
-		ksegment = &image->segment[i];
-		kexec_dprintk("segment[%d]: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
-			      i, ksegment->buf, ksegment->bufsz, ksegment->mem,
-			      ksegment->memsz);
-
-		ret = kimage_load_segment(image, &image->segment[i]);
+	if (!kexec_late_load(image)) {
+		ret = kexec_file_load_segments(image);
 		if (ret)
 			goto out;
 	}
 
-	kimage_terminate(image);
-
-	ret = machine_kexec_post_load(image);
-	if (ret)
-		goto out;
-
-	kexec_dprintk("kexec_file_load: type:%u, start:0x%lx head:0x%lx flags:0x%lx\n",
-		      image->type, image->start, image->head, flags);
-	/*
-	 * Free up any temporary buffers allocated which are not needed
-	 * after image has been loaded
-	 */
-	kimage_file_post_load_cleanup(image);
 exchange:
 	image = xchg(dest_image, image);
 out:
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 12e655a70e25..690b1c21b642 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -34,6 +34,12 @@ static inline void kexec_unlock(void)
 	atomic_set_release(&__kexec_lock, 0);
 }
 
+static inline bool kexec_late_load(struct kimage *image)
+{
+	return IS_ENABLED(CONFIG_KSTATE) && image->file_mode &&
+		(image->type == KEXEC_TYPE_DEFAULT);
+}
+
 #ifdef CONFIG_KEXEC_FILE
 #include <linux/purgatory.h>
 void kimage_file_post_load_cleanup(struct kimage *image);
-- 
2.45.3



  parent reply	other threads:[~2025-03-10 12:04 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-10 12:03 [PATCH v2 0/7] KSTATE: a mechanism to migrate some part of the kernel state across kexec Andrey Ryabinin
2025-03-10 12:03 ` [PATCH v2 1/7] kstate: Add kstate - a mechanism to describe and migrate " Andrey Ryabinin
2025-03-10 12:03 ` [PATCH v2 2/7] kstate, kexec, x86: transfer kstate data " Andrey Ryabinin
2025-03-10 12:03 ` [PATCH v2 3/7] kexec: exclude control pages from the destination addresses Andrey Ryabinin
2025-03-10 12:03 ` Andrey Ryabinin [this message]
2025-03-11 11:31   ` [PATCH v2 4/7] kexec, kstate: delay loading of kexec segments kernel test robot
2025-03-11 12:25   ` kernel test robot
2025-03-10 12:03 ` [PATCH v2 5/7] x86, kstate: Add the ability to preserve memory pages across kexec Andrey Ryabinin
2025-03-10 12:03 ` [PATCH v2 6/7] kexec, kstate: save kstate data before kexec'ing Andrey Ryabinin
2025-03-10 12:03 ` [PATCH v2 7/7] kstate, test: add test module for testing kstate subsystem Andrey Ryabinin
2025-03-11  2:27 ` [PATCH v2 0/7] KSTATE: a mechanism to migrate some part of the kernel state across kexec Cong Wang
2025-03-11 12:19   ` Andrey Ryabinin
2025-04-28 23:01     ` Chris Li
2025-04-28 23:01 ` Chris Li
2025-05-05 14:35   ` Andrey Ryabinin
2025-05-07  6:11     ` Chris Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250310120318.2124-5-arbn@yandex-team.com \
    --to=arbn@yandex-team.com \
    --cc=akpm@linux-foundation.org \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=ebiederm@xmission.com \
    --cc=graf@amazon.com \
    --cc=hpa@zytor.com \
    --cc=jgg@nvidia.com \
    --cc=jgowans@amazon.com \
    --cc=kexec@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@redhat.com \
    --cc=pasha.tatashin@soleen.com \
    --cc=ptyadav@amazon.de \
    --cc=rientjes@google.com \
    --cc=rppt@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox