linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Gregory Price <gourry@gourry.net>
To: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org, kernel-team@meta.com,
	david@redhat.com, osalvador@suse.de, gregkh@linuxfoundation.org,
	rafael@kernel.org, dakr@kernel.org, akpm@linux-foundation.org,
	lorenzo.stoakes@oracle.com, Liam.Howlett@oracle.com,
	vbabka@suse.cz, rppt@kernel.org, surenb@google.com,
	mhocko@suse.com, hare@suse.de
Subject: [RFC PATCH] memory,memory_hotplug: allow restricting memory blocks to zone movable
Date: Mon,  5 Jan 2026 15:36:11 -0500	[thread overview]
Message-ID: <20260105203611.4079743-1-gourry@gourry.net> (raw)

It was reported (LPC 2025) that userland services which monitor memory
blocks can cause hot-unplug to fail permanently.

This can occur when drivers attempt to hot-remove memory in two phases
(offline, remove), while a userland service detects the memory offline
and re-onlines the memory into a zone which may prevent removal.

This patch allows a driver to specify that a given memory block is
intended as ZONE_MOVABLE memory only (i.e. the system should try to
protect its hot-unpluggability). This is done via an MHP flag and a new
"movable_only" bool in `struct memory_block`.

Attempts to online a memory block with movable_only=true with any value
other than MMOP_ONLINE_MOVABLE will fail with -EINVAL.

It is hard to catch all possible ways to implement offline/remove
process, so a race condition here can clearly still occur if the
userland service onlines the memory back into ZONE_MOVABLE, but it at
least will not prevent the removal of a block at a later time.

Suggested-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Gregory Price <gourry@gourry.net>
---
 drivers/base/memory.c          | 15 +++++++++++----
 include/linux/memory.h         |  4 +++-
 include/linux/memory_hotplug.h | 13 +++++++++++++
 mm/memory_hotplug.c            | 12 +++++++++---
 4 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 6d84a02cfa5d..59512e4b8d62 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -374,6 +374,8 @@ static int memory_block_change_state(struct memory_block *mem,
 
 	if (to_state == MEM_OFFLINE)
 		mem->state = MEM_GOING_OFFLINE;
+	else if (mem->movable_only && to_state != MMOP_ONLINE_MOVABLE)
+		return -EINVAL;
 
 	ret = memory_block_action(mem, to_state);
 	mem->state = ret ? from_state_req : to_state;
@@ -811,7 +813,8 @@ void memory_block_add_nid_early(struct memory_block *mem, int nid)
 
 static int add_memory_block(unsigned long block_id, int nid, unsigned long state,
 			    struct vmem_altmap *altmap,
-			    struct memory_group *group)
+			    struct memory_group *group,
+			    bool movable_only)
 {
 	struct memory_block *mem;
 	int ret = 0;
@@ -829,6 +832,7 @@ static int add_memory_block(unsigned long block_id, int nid, unsigned long state
 	mem->state = state;
 	mem->nid = nid;
 	mem->altmap = altmap;
+	mem->movable_only = movable_only;
 	INIT_LIST_HEAD(&mem->group_next);
 
 #ifndef CONFIG_NUMA
@@ -880,7 +884,8 @@ static void remove_memory_block(struct memory_block *memory)
  */
 int create_memory_block_devices(unsigned long start, unsigned long size,
 				int nid, struct vmem_altmap *altmap,
-				struct memory_group *group)
+				struct memory_group *group,
+				bool movable_only)
 {
 	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
 	unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
@@ -893,7 +898,8 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
 		return -EINVAL;
 
 	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
-		ret = add_memory_block(block_id, nid, MEM_OFFLINE, altmap, group);
+		ret = add_memory_block(block_id, nid, MEM_OFFLINE, altmap, group,
+				       movable_only);
 		if (ret)
 			break;
 	}
@@ -998,7 +1004,8 @@ void __init memory_dev_init(void)
 			continue;
 
 		block_id = memory_block_id(nr);
-		ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_ONLINE, NULL, NULL);
+		ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_ONLINE, NULL, NULL,
+				       false);
 		if (ret) {
 			panic("%s() failed to add memory block: %d\n",
 			      __func__, ret);
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 43d378038ce2..bab24f796d3d 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -80,6 +80,7 @@ struct memory_block {
 	struct vmem_altmap *altmap;
 	struct memory_group *group;	/* group (if any) for this block */
 	struct list_head group_next;	/* next block inside memory group */
+	bool movable_only;		/* If set, only ZONE_MOVABLE is valid */
 #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
 	atomic_long_t nr_hwpoison;
 #endif
@@ -160,7 +161,8 @@ extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
 int create_memory_block_devices(unsigned long start, unsigned long size,
 				int nid, struct vmem_altmap *altmap,
-				struct memory_group *group);
+				struct memory_group *group,
+				bool movable_only);
 void remove_memory_block_devices(unsigned long start, unsigned long size);
 extern void memory_dev_init(void);
 extern int memory_notify(unsigned long val, void *v);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 23f038a16231..ca51ef2ad0cf 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -75,6 +75,19 @@ typedef int __bitwise mhp_t;
  */
 #define MHP_OFFLINE_INACCESSIBLE	((__force mhp_t)BIT(3))
 
+/*
+ * Restrict hotplugged memory blocks to ZONE_MOVABLE only.
+ *
+ * During offlining of hotplugged memory which was originally onlined
+ * as ZONE_MOVABLE, userland services may detect blocks going offline
+ * and automatically re-online them into ZONE_NORMAL or lower.  When
+ * this happens it may become permanently incapable of being removed.
+ *
+ * Allow driver-managed memory sources to restrict memory blocks to
+ * ZONE_MOVABLE only, so that the truly degenerate case can be mitigated.
+ */
+#define MHP_MOVABLE_ONLY		((__force mhp_t)BIT(4))
+
 /*
  * Extended parameters for memory hotplug:
  * altmap: alternative allocator for memmap array (optional)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 81ba5b019926..1a184bfd87f6 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1346,7 +1346,9 @@ static int check_hotplug_memory_range(u64 start, u64 size)
 
 static int online_memory_block(struct memory_block *mem, void *arg)
 {
-	mem->online_type = mhp_get_default_online_type();
+	mem->online_type = mem->movable_only ?
+			   MMOP_ONLINE_MOVABLE :
+			   mhp_get_default_online_type();
 	return device_online(&mem->dev);
 }
 
@@ -1449,6 +1451,7 @@ static int create_altmaps_and_memory_blocks(int nid, struct memory_group *group,
 	unsigned long memblock_size = memory_block_size_bytes();
 	u64 cur_start;
 	int ret;
+	bool movable_only = mhp_flags & MHP_MOVABLE_ONLY;
 
 	for (cur_start = start; cur_start < start + size;
 	     cur_start += memblock_size) {
@@ -1478,7 +1481,8 @@ static int create_altmaps_and_memory_blocks(int nid, struct memory_group *group,
 
 		/* create memory block devices after memory was added */
 		ret = create_memory_block_devices(cur_start, memblock_size, nid,
-						  params.altmap, group);
+						  params.altmap, group,
+						  movable_only);
 		if (ret) {
 			arch_remove_memory(cur_start, memblock_size, NULL);
 			kfree(params.altmap);
@@ -1506,6 +1510,7 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
 	struct memory_group *group = NULL;
 	u64 start, size;
 	bool new_node = false;
+	bool movable_only = mhp_flags & MHP_MOVABLE_ONLY;
 	int ret;
 
 	start = res->start;
@@ -1564,7 +1569,8 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
 			goto error;
 
 		/* create memory block devices after memory was added */
-		ret = create_memory_block_devices(start, size, nid, NULL, group);
+		ret = create_memory_block_devices(start, size, nid, NULL, group,
+						  movable_only);
 		if (ret) {
 			arch_remove_memory(start, size, params.altmap);
 			goto error;
-- 
2.52.0



             reply	other threads:[~2026-01-05 20:36 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-05 20:36 Gregory Price [this message]
2026-01-06 15:05 ` Michal Hocko
2026-01-06 16:53   ` Gregory Price
2026-01-06 19:49     ` Michal Hocko
2026-01-07 12:47       ` Hannes Reinecke
2026-01-07 17:17         ` Michal Hocko
2026-01-07 15:09       ` David Hildenbrand (Red Hat)
2026-01-07 16:00         ` Gregory Price
2026-01-07 17:19         ` Michal Hocko
2026-01-06 15:24 ` David Hildenbrand (Red Hat)
2026-01-06 16:58   ` Gregory Price
2026-01-06 17:52     ` David Hildenbrand (Red Hat)
2026-01-06 18:06       ` Gregory Price
2026-01-06 18:38         ` David Hildenbrand (Red Hat)
2026-01-06 19:59           ` Gregory Price
2026-01-06 20:22             ` David Hildenbrand (Red Hat)
2026-01-08  7:31               ` Hannes Reinecke
2026-01-08 14:16                 ` David Hildenbrand (Red Hat)
2026-01-08  7:21         ` Hannes Reinecke
2026-01-08  7:22         ` Hannes Reinecke

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260105203611.4079743-1-gourry@gourry.net \
    --to=gourry@gourry.net \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=dakr@kernel.org \
    --cc=david@redhat.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=hare@suse.de \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mhocko@suse.com \
    --cc=osalvador@suse.de \
    --cc=rafael@kernel.org \
    --cc=rppt@kernel.org \
    --cc=surenb@google.com \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox