From: Rakie Kim <rakie.kim@sk.com>
To: gourry@gourry.net
Cc: akpm@linux-foundation.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org, linux-cxl@vger.kernel.org,
joshua.hahnjy@gmail.com, dan.j.williams@intel.com,
ying.huang@linux.alibaba.com, david@redhat.com,
Jonathan.Cameron@huawei.com, kernel_team@skhynix.com,
honggyu.kim@sk.com, yunjeong.mun@sk.com, rakie.kim@sk.com
Subject: [PATCH v5 3/3] mm/mempolicy: Support memory hotplug in weighted interleave
Date: Wed, 2 Apr 2025 10:49:04 +0900 [thread overview]
Message-ID: <20250402014906.1086-4-rakie.kim@sk.com> (raw)
In-Reply-To: <20250402014906.1086-1-rakie.kim@sk.com>
The weighted interleave policy distributes page allocations across multiple
NUMA nodes based on their performance weight, thereby improving memory
bandwidth utilization. The weight values for each node are configured
through sysfs.
Previously, sysfs entries for configuring weighted interleave were created
for all possible nodes (N_POSSIBLE) at initialization, including nodes that
might not have memory. However, not all nodes in N_POSSIBLE are usable at
runtime, as some may remain memoryless or offline.
This led to sysfs entries being created for unusable nodes, causing
potential misconfiguration issues.
To address this issue, this patch modifies the sysfs creation logic to:
1) Limit sysfs entries to nodes that are online and have memory, avoiding
the creation of sysfs entries for nodes that cannot be used.
2) Support memory hotplug by dynamically adding and removing sysfs entries
based on whether a node transitions into or out of the N_MEMORY state.
Additionally, the patch ensures that sysfs attributes are properly managed
when nodes go offline, preventing stale or redundant entries from persisting
in the system.
By making these changes, the weighted interleave policy now manages its
sysfs entries more efficiently, ensuring that only relevant nodes are
considered for interleaving, and dynamically adapting to memory hotplug
events.
Signed-off-by: Rakie Kim <rakie.kim@sk.com>
Signed-off-by: Honggyu Kim <honggyu.kim@sk.com>
Signed-off-by: Yunjeong Mun <yunjeong.mun@sk.com>
---
mm/mempolicy.c | 110 ++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 87 insertions(+), 23 deletions(-)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 3092a792bd28..ea4f3f3f2cdd 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -113,6 +113,7 @@
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <linux/uaccess.h>
+#include <linux/memory.h>
#include "internal.h"
@@ -3390,6 +3391,7 @@ struct iw_node_attr {
struct sysfs_wi_group {
struct kobject wi_kobj;
+ struct mutex kobj_lock;
struct iw_node_attr *nattrs[];
};
@@ -3439,13 +3441,24 @@ static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr,
static void sysfs_wi_node_release(int nid)
{
- if (!wi_group->nattrs[nid])
+ struct iw_node_attr *attr;
+
+ if (nid < 0 || nid >= nr_node_ids)
+ return;
+
+ mutex_lock(&wi_group->kobj_lock);
+ attr = wi_group->nattrs[nid];
+ if (!attr) {
+ mutex_unlock(&wi_group->kobj_lock);
return;
+ }
+
+ wi_group->nattrs[nid] = NULL;
+ mutex_unlock(&wi_group->kobj_lock);
- sysfs_remove_file(&wi_group->wi_kobj,
- &wi_group->nattrs[nid]->kobj_attr.attr);
- kfree(wi_group->nattrs[nid]->kobj_attr.attr.name);
- kfree(wi_group->nattrs[nid]);
+ sysfs_remove_file(&wi_group->wi_kobj, &attr->kobj_attr.attr);
+ kfree(attr->kobj_attr.attr.name);
+ kfree(attr);
}
static void sysfs_wi_release(struct kobject *wi_kobj)
@@ -3464,35 +3477,81 @@ static const struct kobj_type wi_ktype = {
static int sysfs_wi_node_add(int nid)
{
- struct iw_node_attr *node_attr;
+ int ret = 0;
char *name;
+ struct iw_node_attr *new_attr = NULL;
- node_attr = kzalloc(sizeof(*node_attr), GFP_KERNEL);
- if (!node_attr)
+ if (nid < 0 || nid >= nr_node_ids) {
+ pr_err("Invalid node id: %d\n", nid);
+ return -EINVAL;
+ }
+
+ new_attr = kzalloc(sizeof(struct iw_node_attr), GFP_KERNEL);
+ if (!new_attr)
return -ENOMEM;
name = kasprintf(GFP_KERNEL, "node%d", nid);
if (!name) {
- kfree(node_attr);
+ kfree(new_attr);
return -ENOMEM;
}
- sysfs_attr_init(&node_attr->kobj_attr.attr);
- node_attr->kobj_attr.attr.name = name;
- node_attr->kobj_attr.attr.mode = 0644;
- node_attr->kobj_attr.show = node_show;
- node_attr->kobj_attr.store = node_store;
- node_attr->nid = nid;
+ mutex_lock(&wi_group->kobj_lock);
+ if (wi_group->nattrs[nid]) {
+ mutex_unlock(&wi_group->kobj_lock);
+ pr_info("Node [%d] already exists\n", nid);
+ kfree(new_attr);
+ kfree(name);
+ return 0;
+ }
+ wi_group->nattrs[nid] = new_attr;
- if (sysfs_create_file(&wi_group->wi_kobj, &node_attr->kobj_attr.attr)) {
- kfree(node_attr->kobj_attr.attr.name);
- kfree(node_attr);
- pr_err("failed to add attribute to weighted_interleave\n");
- return -ENOMEM;
+ sysfs_attr_init(&wi_group->nattrs[nid]->kobj_attr.attr);
+ wi_group->nattrs[nid]->kobj_attr.attr.name = name;
+ wi_group->nattrs[nid]->kobj_attr.attr.mode = 0644;
+ wi_group->nattrs[nid]->kobj_attr.show = node_show;
+ wi_group->nattrs[nid]->kobj_attr.store = node_store;
+ wi_group->nattrs[nid]->nid = nid;
+
+ ret = sysfs_create_file(&wi_group->wi_kobj,
+ &wi_group->nattrs[nid]->kobj_attr.attr);
+ if (ret) {
+ kfree(wi_group->nattrs[nid]->kobj_attr.attr.name);
+ kfree(wi_group->nattrs[nid]);
+ wi_group->nattrs[nid] = NULL;
+ pr_err("Failed to add attribute to weighted_interleave: %d\n", ret);
}
+ mutex_unlock(&wi_group->kobj_lock);
- wi_group->nattrs[nid] = node_attr;
- return 0;
+ return ret;
+}
+
+static int wi_node_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ int err;
+ struct memory_notify *arg = data;
+ int nid = arg->status_change_nid;
+
+ if (nid < 0)
+ goto notifier_end;
+
+ switch(action) {
+ case MEM_ONLINE:
+ err = sysfs_wi_node_add(nid);
+ if (err) {
+ pr_err("failed to add sysfs [node%d]\n", nid);
+ return NOTIFY_BAD;
+ }
+ break;
+ case MEM_OFFLINE:
+ if (!node_state(nid, N_MEMORY))
+ sysfs_wi_node_release(nid);
+ break;
+ }
+
+notifier_end:
+ return NOTIFY_OK;
}
static int add_weighted_interleave_group(struct kobject *mempolicy_kobj)
@@ -3504,13 +3563,17 @@ static int add_weighted_interleave_group(struct kobject *mempolicy_kobj)
GFP_KERNEL);
if (!wi_group)
return -ENOMEM;
+ mutex_init(&wi_group->kobj_lock);
err = kobject_init_and_add(&wi_group->wi_kobj, &wi_ktype, mempolicy_kobj,
"weighted_interleave");
if (err)
goto err_out;
- for_each_node_state(nid, N_POSSIBLE) {
+ for_each_online_node(nid) {
+ if (!node_state(nid, N_MEMORY))
+ continue;
+
err = sysfs_wi_node_add(nid);
if (err) {
pr_err("failed to add sysfs [node%d]\n", nid);
@@ -3518,6 +3581,7 @@ static int add_weighted_interleave_group(struct kobject *mempolicy_kobj)
}
}
+ hotplug_memory_notifier(wi_node_notifier, DEFAULT_CALLBACK_PRI);
return 0;
err_out:
--
2.34.1
next prev parent reply other threads:[~2025-04-02 1:50 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-02 1:49 [PATCH v5 0/3] Enhance sysfs handling for " Rakie Kim
2025-04-02 1:49 ` [PATCH v5 1/3] mm/mempolicy: Fix memory leaks in weighted interleave sysfs Rakie Kim
2025-04-02 1:49 ` [PATCH v5 2/3] mm/mempolicy: Support dynamic sysfs updates for weighted interleave Rakie Kim
2025-04-02 1:49 ` Rakie Kim [this message]
2025-04-02 4:18 ` [PATCH v5 3/3] mm/mempolicy: Support memory hotplug in " Honggyu Kim
2025-04-02 4:34 ` Andrew Morton
2025-04-02 5:15 ` Gregory Price
2025-04-02 5:23 ` Gregory Price
2025-04-02 5:32 ` Honggyu Kim
2025-04-02 9:51 ` Oscar Salvador
2025-04-02 14:14 ` Gregory Price
2025-04-02 15:41 ` Oscar Salvador
2025-04-02 16:36 ` Gregory Price
2025-04-03 4:26 ` Rakie Kim
2025-04-03 5:45 ` Rakie Kim
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250402014906.1086-4-rakie.kim@sk.com \
--to=rakie.kim@sk.com \
--cc=Jonathan.Cameron@huawei.com \
--cc=akpm@linux-foundation.org \
--cc=dan.j.williams@intel.com \
--cc=david@redhat.com \
--cc=gourry@gourry.net \
--cc=honggyu.kim@sk.com \
--cc=joshua.hahnjy@gmail.com \
--cc=kernel_team@skhynix.com \
--cc=linux-cxl@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ying.huang@linux.alibaba.com \
--cc=yunjeong.mun@sk.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox