linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
To: linux-mm@kvack.org, akpm@linux-foundation.org
Cc: Wei Xu <weixugc@google.com>, Huang Ying <ying.huang@intel.com>,
	Greg Thelen <gthelen@google.com>, Yang Shi <shy828301@gmail.com>,
	Davidlohr Bueso <dave@stgolabs.net>,
	Tim C Chen <tim.c.chen@intel.com>,
	Brice Goglin <brice.goglin@gmail.com>,
	Michal Hocko <mhocko@kernel.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Hesham Almatary <hesham.almatary@huawei.com>,
	Dave Hansen <dave.hansen@intel.com>,
	Jonathan Cameron <Jonathan.Cameron@huawei.com>,
	Alistair Popple <apopple@nvidia.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Feng Tang <feng.tang@intel.com>,
	Jagdish Gediya <jvgediya@linux.ibm.com>,
	Baolin Wang <baolin.wang@linux.alibaba.com>,
	David Rientjes <rientjes@google.com>,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Subject: [PATCH v5 2/9] mm/demotion: Expose per node memory tier to sysfs
Date: Fri,  3 Jun 2022 19:12:30 +0530	[thread overview]
Message-ID: <20220603134237.131362-3-aneesh.kumar@linux.ibm.com> (raw)
In-Reply-To: <20220603134237.131362-1-aneesh.kumar@linux.ibm.com>

Add support to modify the memory tier for a NUMA node.

/sys/devices/system/node/nodeN/memtier

where N = node id

When read, It list the memory tier that the node belongs to.

When written, the kernel moves the node into the specified
memory tier, the tier assignment of all other nodes are not
affected.

If the memory tier does not exist, writing to the above file
creates the tier and assign the NUMA node to that tier.

mutex memory_tier_lock is introduced to protect memory tier
related chanegs as it can happen from sysfs as well on hot
plug events.

Suggested-by: Wei Xu <weixugc@google.com>
Signed-off-by: Jagdish Gediya <jvgediya@linux.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 drivers/base/node.c          |  39 +++++++++++
 include/linux/memory-tiers.h |   3 +
 mm/memory-tiers.c            | 123 ++++++++++++++++++++++++++++++++++-
 3 files changed, 164 insertions(+), 1 deletion(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 0ac6376ef7a1..599ed64d910f 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -20,6 +20,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
+#include <linux/memory-tiers.h>
 
 static struct bus_type node_subsys = {
 	.name = "node",
@@ -560,11 +561,49 @@ static ssize_t node_read_distance(struct device *dev,
 }
 static DEVICE_ATTR(distance, 0444, node_read_distance, NULL);
 
+#ifdef CONFIG_TIERED_MEMORY
+static ssize_t memtier_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	int node = dev->id;
+	int tier_index = node_get_memory_tier_id(node);
+
+	if (tier_index != -1)
+		return sysfs_emit(buf, "%d\n", tier_index);
+	return 0;
+}
+
+static ssize_t memtier_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	unsigned long tier;
+	int node = dev->id;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &tier);
+	if (ret)
+		return ret;
+
+	ret = node_reset_memory_tier(node, tier);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(memtier);
+#endif
+
 static struct attribute *node_dev_attrs[] = {
 	&dev_attr_meminfo.attr,
 	&dev_attr_numastat.attr,
 	&dev_attr_distance.attr,
 	&dev_attr_vmstat.attr,
+#ifdef CONFIG_TIERED_MEMORY
+	&dev_attr_memtier.attr,
+#endif
 	NULL
 };
 
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index e17f6b4ee177..91f071804476 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -15,6 +15,9 @@
 #define DEFAULT_MEMORY_TIER	MEMORY_TIER_DRAM
 #define MAX_MEMORY_TIERS  3
 
+int node_get_memory_tier_id(int node);
+int node_set_memory_tier(int node, int tier);
+int node_reset_memory_tier(int node, int tier);
 #endif	/* CONFIG_TIERED_MEMORY */
 
 #endif
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 7de18d94a08d..9c78c47ad030 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -126,7 +126,6 @@ static struct memory_tier *register_memory_tier(unsigned int tier)
 	return memtier;
 }
 
-__maybe_unused // temporay to prevent warnings during bisects
 static void unregister_memory_tier(struct memory_tier *memtier)
 {
 	list_del(&memtier->list);
@@ -162,6 +161,128 @@ static const struct attribute_group *memory_tier_attr_groups[] = {
 	NULL,
 };
 
+static struct memory_tier *__node_get_memory_tier(int node)
+{
+	struct memory_tier *memtier;
+
+	list_for_each_entry(memtier, &memory_tiers, list) {
+		if (node_isset(node, memtier->nodelist))
+			return memtier;
+	}
+	return NULL;
+}
+
+static struct memory_tier *__get_memory_tier_from_id(int id)
+{
+	struct memory_tier *memtier;
+
+	list_for_each_entry(memtier, &memory_tiers, list) {
+		if (memtier->dev.id == id)
+			return memtier;
+	}
+	return NULL;
+}
+
+__maybe_unused // temporay to prevent warnings during bisects
+static void node_remove_from_memory_tier(int node)
+{
+	struct memory_tier *memtier;
+
+	mutex_lock(&memory_tier_lock);
+
+	memtier = __node_get_memory_tier(node);
+	if (!memtier)
+		goto out;
+	/*
+	 * Remove node from tier, if tier becomes
+	 * empty then unregister it to make it invisible
+	 * in sysfs.
+	 */
+	node_clear(node, memtier->nodelist);
+	if (nodes_empty(memtier->nodelist))
+		unregister_memory_tier(memtier);
+
+out:
+	mutex_unlock(&memory_tier_lock);
+}
+
+int node_get_memory_tier_id(int node)
+{
+	int tier = -1;
+	struct memory_tier *memtier;
+	/*
+	 * Make sure memory tier is not unregistered
+	 * while it is being read.
+	 */
+	mutex_lock(&memory_tier_lock);
+	memtier = __node_get_memory_tier(node);
+	if (memtier)
+		tier = memtier->dev.id;
+	mutex_unlock(&memory_tier_lock);
+
+	return tier;
+}
+
+static int __node_set_memory_tier(int node, int tier)
+{
+	int ret = 0;
+	struct memory_tier *memtier;
+
+	memtier = __get_memory_tier_from_id(tier);
+	if (!memtier) {
+		memtier = register_memory_tier(tier);
+		if (!memtier) {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+	node_set(node, memtier->nodelist);
+out:
+	return ret;
+}
+
+int node_reset_memory_tier(int node, int tier)
+{
+	struct memory_tier *current_tier;
+	int ret = 0;
+
+	mutex_lock(&memory_tier_lock);
+
+	current_tier = __node_get_memory_tier(node);
+	if (!current_tier || current_tier->dev.id == tier)
+		goto out;
+
+	node_clear(node, current_tier->nodelist);
+
+	ret = __node_set_memory_tier(node, tier);
+	if (ret) {
+		/* reset it back to older tier */
+		node_set(node, current_tier->nodelist);
+		goto out;
+	}
+
+	if (nodes_empty(current_tier->nodelist))
+		unregister_memory_tier(current_tier);
+out:
+	mutex_unlock(&memory_tier_lock);
+
+	return ret;
+}
+
+int node_set_memory_tier(int node, int tier)
+{
+	struct memory_tier *memtier;
+	int ret = 0;
+
+	mutex_lock(&memory_tier_lock);
+	memtier = __node_get_memory_tier(node);
+	if (!memtier)
+		ret = __node_set_memory_tier(node, tier);
+	mutex_unlock(&memory_tier_lock);
+
+	return ret;
+}
+
 static int __init memory_tier_init(void)
 {
 	int ret;
-- 
2.36.1



  parent reply	other threads:[~2022-06-03 13:43 UTC|newest]

Thread overview: 84+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-03 13:42 [PATCH v5 0/9] mm/demotion: Memory tiers and demotion Aneesh Kumar K.V
2022-06-03 13:42 ` [PATCH v5 1/9] mm/demotion: Add support for explicit memory tiers Aneesh Kumar K.V
2022-06-07 18:43   ` Tim Chen
2022-06-07 20:18     ` Wei Xu
2022-06-08  4:30     ` Aneesh Kumar K V
2022-06-08  6:06       ` Ying Huang
2022-06-08  4:37     ` Aneesh Kumar K V
2022-06-08  6:10       ` Ying Huang
2022-06-08  8:04         ` Aneesh Kumar K V
2022-06-07 21:32   ` Yang Shi
2022-06-08  1:34     ` Ying Huang
2022-06-08 16:37       ` Yang Shi
2022-06-09  6:52         ` Ying Huang
2022-06-08  4:58     ` Aneesh Kumar K V
2022-06-08  6:18       ` Ying Huang
2022-06-08 16:42       ` Yang Shi
2022-06-09  8:17         ` Aneesh Kumar K V
2022-06-09 16:04           ` Yang Shi
2022-06-08 14:11   ` Johannes Weiner
2022-06-08 14:21     ` Aneesh Kumar K V
2022-06-08 15:55     ` Johannes Weiner
2022-06-08 16:13       ` Aneesh Kumar K V
2022-06-08 18:16         ` Johannes Weiner
2022-06-09  2:33           ` Aneesh Kumar K V
2022-06-09 13:55             ` Johannes Weiner
2022-06-09 14:22               ` Jonathan Cameron
2022-06-09 20:41                 ` Johannes Weiner
2022-06-10  6:15                   ` Ying Huang
2022-06-10  9:57                   ` Jonathan Cameron
2022-06-13 14:05                     ` Johannes Weiner
2022-06-13 14:23                       ` Aneesh Kumar K V
2022-06-13 15:50                         ` Johannes Weiner
2022-06-14  6:48                           ` Ying Huang
2022-06-14  8:01                           ` Aneesh Kumar K V
2022-06-14 18:56                             ` Johannes Weiner
2022-06-15  6:23                               ` Aneesh Kumar K V
2022-06-16  1:11                               ` Ying Huang
2022-06-16  3:45                                 ` Wei Xu
2022-06-16  4:47                                   ` Aneesh Kumar K V
2022-06-16  5:51                                     ` Ying Huang
2022-06-17 10:41                                 ` Jonathan Cameron
2022-06-20  1:54                                   ` Huang, Ying
2022-06-14 16:45                       ` Jonathan Cameron
2022-06-21  8:27                         ` Aneesh Kumar K V
2022-06-03 13:42 ` Aneesh Kumar K.V [this message]
2022-06-07 20:15   ` [PATCH v5 2/9] mm/demotion: Expose per node memory tier to sysfs Tim Chen
2022-06-08  4:55     ` Aneesh Kumar K V
2022-06-08  6:42       ` Ying Huang
2022-06-08 16:06       ` Tim Chen
2022-06-08 16:15         ` Aneesh Kumar K V
2022-06-03 13:42 ` [PATCH v5 3/9] mm/demotion: Move memory demotion related code Aneesh Kumar K.V
2022-06-06 13:39   ` Bharata B Rao
2022-06-03 13:42 ` [PATCH v5 4/9] mm/demotion: Build demotion targets based on explicit memory tiers Aneesh Kumar K.V
2022-06-07 22:51   ` Tim Chen
2022-06-08  5:02     ` Aneesh Kumar K V
2022-06-08  6:52     ` Ying Huang
2022-06-08  6:50   ` Ying Huang
2022-06-08  8:19     ` Aneesh Kumar K V
2022-06-08  8:00   ` Ying Huang
2022-06-03 13:42 ` [PATCH v5 5/9] mm/demotion/dax/kmem: Set node's memory tier to MEMORY_TIER_PMEM Aneesh Kumar K.V
2022-06-03 13:42 ` [PATCH v5 6/9] mm/demotion: Add support for removing node from demotion memory tiers Aneesh Kumar K.V
2022-06-07 23:40   ` Tim Chen
2022-06-08  6:59   ` Ying Huang
2022-06-08  8:20     ` Aneesh Kumar K V
2022-06-08  8:23       ` Ying Huang
2022-06-08  8:29         ` Aneesh Kumar K V
2022-06-08  8:34           ` Ying Huang
2022-06-03 13:42 ` [PATCH v5 7/9] mm/demotion: Demote pages according to allocation fallback order Aneesh Kumar K.V
2022-06-03 13:42 ` [PATCH v5 8/9] mm/demotion: Add documentation for memory tiering Aneesh Kumar K.V
2022-06-03 13:42 ` [PATCH v5 9/9] mm/demotion: Update node_is_toptier to work with memory tiers Aneesh Kumar K.V
2022-06-06  3:11   ` Ying Huang
2022-06-06  3:52     ` Aneesh Kumar K V
2022-06-06  7:24       ` Ying Huang
2022-06-06  8:33         ` Aneesh Kumar K V
2022-06-08  7:26           ` Ying Huang
2022-06-08  8:28             ` Aneesh Kumar K V
2022-06-08  8:32               ` Ying Huang
2022-06-08 14:37                 ` Aneesh Kumar K.V
2022-06-08 20:14                   ` Tim Chen
2022-06-10  6:04                   ` Ying Huang
2022-06-06  4:53 ` [PATCH] mm/demotion: Add sysfs ABI documentation Aneesh Kumar K.V
2022-06-08 13:57 ` [PATCH v5 0/9] mm/demotion: Memory tiers and demotion Johannes Weiner
2022-06-08 14:20   ` Aneesh Kumar K V
2022-06-09  8:53     ` Jonathan Cameron

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220603134237.131362-3-aneesh.kumar@linux.ibm.com \
    --to=aneesh.kumar@linux.ibm.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=brice.goglin@gmail.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=dave@stgolabs.net \
    --cc=feng.tang@intel.com \
    --cc=gthelen@google.com \
    --cc=hesham.almatary@huawei.com \
    --cc=jvgediya@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=rientjes@google.com \
    --cc=shy828301@gmail.com \
    --cc=tim.c.chen@intel.com \
    --cc=weixugc@google.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox