linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Pratyush Yadav <pratyush@kernel.org>
To: "Alexander Graf" <graf@amazon.com>,
	"Mike Rapoport" <rppt@kernel.org>,
	"Changyuan Lyu" <changyuanl@google.com>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Baoquan He" <bhe@redhat.com>,
	"Pratyush Yadav" <pratyush@kernel.org>,
	"Pasha Tatashin" <pasha.tatashin@soleen.com>,
	"Jason Gunthorpe" <jgg@nvidia.com>,
	"Thomas Weißschuh" <thomas.weissschuh@linutronix.de>,
	"Chris Li" <chrisl@kernel.org>, "Jason Miu" <jasonmiu@google.com>,
	"David Matlack" <dmatlack@google.com>,
	"David Rientjes" <rientjes@google.com>
Cc: linux-kernel@vger.kernel.org, kexec@lists.infradead.org,
	linux-mm@kvack.org
Subject: [RFC PATCH 2/4] kho: use KHO array for preserved memory bitmap serialization
Date: Tue,  9 Sep 2025 16:44:22 +0200	[thread overview]
Message-ID: <20250909144426.33274-3-pratyush@kernel.org> (raw)
In-Reply-To: <20250909144426.33274-1-pratyush@kernel.org>

The preserved memory bitmap preservation creates a linked list of pages
to track the bitmaps for preserved memory. Essentially, it is a
scattered list of pointers grouped by folio order. Use a KHO array to
hold the pointers to the bitmaps instead. This moves the burden of
tracking this metadata to the KHO array layer, and makes the KHO core
simpler.

Currently, the bitmaps are held in chunks, which is a fixed-size array
of pointers, plus some metadata including the order of the preserved
folios. The KHO array holds only pointers and has no mechanism for
grouping. To make the serialization format simpler, move the folio order
from struct khoser_mem_chunk to struct khoser_mem_bitmap_ptr.

The chunks to hold the bitmaps are not KHO-preserved since they are only
used during the scratch-only phase. The same holds true with the KHO
array. The pages which track the KHO array metadata are not
KHO-preserved and thus are only valid during the scratch phase of the
next kernel. After that, they are discarded and freed to buddy.

Signed-off-by: Pratyush Yadav <pratyush@kernel.org>
---

The diff is a bit hard to read. The final result can be found at
https://git.kernel.org/pub/scm/linux/kernel/git/pratyush/linux.git/tree/kernel/kexec_handover.c?h=kho-array-rfc-v1#n227

 kernel/kexec_handover.c | 148 +++++++++++++++++++---------------------
 1 file changed, 69 insertions(+), 79 deletions(-)

diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index ecd1ac210dbd7..26f9f5295f07d 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -18,6 +18,7 @@
 #include <linux/memblock.h>
 #include <linux/notifier.h>
 #include <linux/page-isolation.h>
+#include <linux/kho_array.h>
 
 #include <asm/early_ioremap.h>
 
@@ -80,15 +81,13 @@ struct kho_mem_track {
 	struct xarray orders;
 };
 
-struct khoser_mem_chunk;
-
 struct kho_serialization {
 	struct page *fdt;
 	struct list_head fdt_list;
 	struct dentry *sub_fdt_dir;
 	struct kho_mem_track track;
-	/* First chunk of serialized preserved memory map */
-	struct khoser_mem_chunk *preserved_mem_map;
+	/* Serialized preserved memory map */
+	struct kho_array *preserved_mem_map;
 };
 
 static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz)
@@ -226,11 +225,11 @@ EXPORT_SYMBOL_GPL(kho_restore_folio);
 
 /* Serialize and deserialize struct kho_mem_phys across kexec
  *
- * Record all the bitmaps in a linked list of pages for the next kernel to
- * process. Each chunk holds bitmaps of the same order and each block of bitmaps
- * starts at a given physical address. This allows the bitmaps to be sparse. The
- * xarray is used to store them in a tree while building up the data structure,
- * but the KHO successor kernel only needs to process them once in order.
+ * Record all the bitmaps in a KHO array for the next kernel to process. Each
+ * bitmap stores the order of the folios and starts at a given physical address.
+ * This allows the bitmaps to be sparse. The xarray is used to store them in a
+ * tree while building up the data structure, but the KHO successor kernel only
+ * needs to process them once in order.
  *
  * All of this memory is normal kmalloc() memory and is not marked for
  * preservation. The successor kernel will remain isolated to the scratch space
@@ -240,118 +239,107 @@ EXPORT_SYMBOL_GPL(kho_restore_folio);
 
 struct khoser_mem_bitmap_ptr {
 	phys_addr_t phys_start;
-	DECLARE_KHOSER_PTR(bitmap, struct kho_mem_phys_bits *);
-};
-
-struct khoser_mem_chunk_hdr {
-	DECLARE_KHOSER_PTR(next, struct khoser_mem_chunk *);
 	unsigned int order;
-	unsigned int num_elms;
-};
-
-#define KHOSER_BITMAP_SIZE                                   \
-	((PAGE_SIZE - sizeof(struct khoser_mem_chunk_hdr)) / \
-	 sizeof(struct khoser_mem_bitmap_ptr))
-
-struct khoser_mem_chunk {
-	struct khoser_mem_chunk_hdr hdr;
-	struct khoser_mem_bitmap_ptr bitmaps[KHOSER_BITMAP_SIZE];
+	unsigned int __reserved;
+	DECLARE_KHOSER_PTR(bitmap, struct kho_mem_phys_bits *);
 };
 
-static_assert(sizeof(struct khoser_mem_chunk) == PAGE_SIZE);
-
-static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk,
-					  unsigned long order)
+static struct khoser_mem_bitmap_ptr *new_bitmap(phys_addr_t start,
+						struct kho_mem_phys_bits *bits,
+						unsigned int order)
 {
-	struct khoser_mem_chunk *chunk;
+	struct khoser_mem_bitmap_ptr *bitmap;
 
-	chunk = kzalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!chunk)
+	bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
+	if (!bitmap)
 		return NULL;
-	chunk->hdr.order = order;
-	if (cur_chunk)
-		KHOSER_STORE_PTR(cur_chunk->hdr.next, chunk);
-	return chunk;
+
+	bitmap->phys_start = start;
+	bitmap->order = order;
+	KHOSER_STORE_PTR(bitmap->bitmap, bits);
+	return bitmap;
 }
 
-static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk)
+static void kho_mem_ser_free(struct kho_array *ka)
 {
-	struct khoser_mem_chunk *chunk = first_chunk;
+	struct khoser_mem_bitmap_ptr *elm;
+	struct ka_iter iter;
 
-	while (chunk) {
-		struct khoser_mem_chunk *tmp = chunk;
+	if (!ka)
+		return;
 
-		chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
-		kfree(tmp);
-	}
+	ka_iter_init_read(&iter, ka);
+	ka_iter_for_each(&iter, elm)
+		kfree(elm);
+
+	kho_array_destroy(ka);
+	kfree(ka);
 }
 
 static int kho_mem_serialize(struct kho_serialization *ser)
 {
-	struct khoser_mem_chunk *first_chunk = NULL;
-	struct khoser_mem_chunk *chunk = NULL;
 	struct kho_mem_phys *physxa;
-	unsigned long order;
+	unsigned long order, pos = 0;
+	struct kho_array *ka = NULL;
+	struct ka_iter iter;
+
+	ka = kzalloc(sizeof(*ka), GFP_KERNEL);
+	if (!ka)
+		return -ENOMEM;
+	ka_iter_init_write(&iter, ka);
 
 	xa_for_each(&ser->track.orders, order, physxa) {
 		struct kho_mem_phys_bits *bits;
 		unsigned long phys;
 
-		chunk = new_chunk(chunk, order);
-		if (!chunk)
-			goto err_free;
-
-		if (!first_chunk)
-			first_chunk = chunk;
-
 		xa_for_each(&physxa->phys_bits, phys, bits) {
 			struct khoser_mem_bitmap_ptr *elm;
+			phys_addr_t start;
+
+			start = (phys * PRESERVE_BITS) << (order + PAGE_SHIFT);
+			elm = new_bitmap(start, bits, order);
+			if (!elm)
+				goto err_free;
 
-			if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->bitmaps)) {
-				chunk = new_chunk(chunk, order);
-				if (!chunk)
-					goto err_free;
-			}
-
-			elm = &chunk->bitmaps[chunk->hdr.num_elms];
-			chunk->hdr.num_elms++;
-			elm->phys_start = (phys * PRESERVE_BITS)
-					  << (order + PAGE_SHIFT);
-			KHOSER_STORE_PTR(elm->bitmap, bits);
+			ka_iter_setpos(&iter, pos);
+			if (ka_iter_setentry(&iter, elm))
+				goto err_free;
+			pos++;
 		}
 	}
 
-	ser->preserved_mem_map = first_chunk;
+	ser->preserved_mem_map = ka;
 
 	return 0;
 
 err_free:
-	kho_mem_ser_free(first_chunk);
+	kho_mem_ser_free(ka);
 	return -ENOMEM;
 }
 
-static void __init deserialize_bitmap(unsigned int order,
-				      struct khoser_mem_bitmap_ptr *elm)
+static void __init deserialize_bitmap(struct khoser_mem_bitmap_ptr *elm)
 {
 	struct kho_mem_phys_bits *bitmap = KHOSER_LOAD_PTR(elm->bitmap);
 	unsigned long bit;
 
 	for_each_set_bit(bit, bitmap->preserve, PRESERVE_BITS) {
-		int sz = 1 << (order + PAGE_SHIFT);
+		int sz = 1 << (elm->order + PAGE_SHIFT);
 		phys_addr_t phys =
-			elm->phys_start + (bit << (order + PAGE_SHIFT));
+			elm->phys_start + (bit << (elm->order + PAGE_SHIFT));
 		struct page *page = phys_to_page(phys);
 
 		memblock_reserve(phys, sz);
 		memblock_reserved_mark_noinit(phys, sz);
-		page->private = order;
+		page->private = elm->order;
 	}
 }
 
 static void __init kho_mem_deserialize(const void *fdt)
 {
-	struct khoser_mem_chunk *chunk;
+	struct khoser_mem_bitmap_ptr *elm;
 	const phys_addr_t *mem;
+	struct kho_array *ka;
+	struct ka_iter iter;
 	int len;
 
 	mem = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len);
@@ -361,15 +349,17 @@ static void __init kho_mem_deserialize(const void *fdt)
 		return;
 	}
 
-	chunk = *mem ? phys_to_virt(*mem) : NULL;
-	while (chunk) {
-		unsigned int i;
-
-		for (i = 0; i != chunk->hdr.num_elms; i++)
-			deserialize_bitmap(chunk->hdr.order,
-					   &chunk->bitmaps[i]);
-		chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
+	ka = *mem ? phys_to_virt(*mem) : NULL;
+	if (!ka)
+		return;
+	if (!kho_array_valid(ka)) {
+		pr_err("invalid KHO array for preserved memory bitmaps\n");
+		return;
 	}
+
+	ka_iter_init_read(&iter, ka);
+	ka_iter_for_each(&iter, elm)
+		deserialize_bitmap(elm);
 }
 
 /*
-- 
2.47.3



  parent reply	other threads:[~2025-09-09 14:44 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-09 14:44 [RFC PATCH 0/4] kho: introduce the KHO array Pratyush Yadav
2025-09-09 14:44 ` [RFC PATCH 1/4] " Pratyush Yadav
2025-09-09 15:28   ` Jason Gunthorpe
2025-09-09 15:40     ` Pratyush Yadav
2025-09-09 15:50       ` Jason Gunthorpe
2025-09-09 14:44 ` Pratyush Yadav [this message]
2025-09-09 14:44 ` [RFC PATCH 3/4] kho: add support for preserving vmalloc allocations Pratyush Yadav
2025-09-09 14:44 ` [RFC PATCH 4/4] lib/test_kho: use kho_preserve_vmalloc instead of storing addresses in fdt Pratyush Yadav

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250909144426.33274-3-pratyush@kernel.org \
    --to=pratyush@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=bhe@redhat.com \
    --cc=changyuanl@google.com \
    --cc=chrisl@kernel.org \
    --cc=dmatlack@google.com \
    --cc=graf@amazon.com \
    --cc=jasonmiu@google.com \
    --cc=jgg@nvidia.com \
    --cc=kexec@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=pasha.tatashin@soleen.com \
    --cc=rientjes@google.com \
    --cc=rppt@kernel.org \
    --cc=thomas.weissschuh@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox