linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: <gutierrez.asier@huawei-partners.com>
To: <akpm@linux-foundation.org>, <david@redhat.com>,
	<ryan.roberts@arm.com>, <baohua@kernel.org>,
	<willy@infradead.org>, <peterx@redhat.com>, <hannes@cmpxchg.org>,
	<hocko@kernel.org>, <roman.gushchin@linux.dev>,
	<shakeel.butt@linux.dev>, <muchun.song@linux.dev>
Cc: <cgroups@vger.kernel.org>, <linux-mm@kvack.org>,
	<linux-kernel@vger.kernel.org>, <stepanov.anatoly@huawei.com>,
	<alexander.kozhevnikov@huawei-partners.com>,
	<guohanjun@huawei.com>, <weiyongjun1@huawei.com>,
	<wangkefeng.wang@huawei.com>, <judy.chenhui@huawei.com>,
	<yusongping@huawei.com>, <artem.kuzin@huawei.com>,
	<kang.sun@huawei.com>
Subject: [RFC PATCH 3/3] mm: Add thp_defrag control for cgroup
Date: Wed, 30 Oct 2024 16:33:11 +0800	[thread overview]
Message-ID: <20241030083311.965933-4-gutierrez.asier@huawei-partners.com> (raw)
In-Reply-To: <20241030083311.965933-1-gutierrez.asier@huawei-partners.com>

From: Asier Gutierrez <gutierrez.asier@huawei-partners.com>

This patch exposes a new file in memory cgroups: memory.thp_defrag, which
follows the /sys/kernel/mm/transparent_hugepage/defrag style. Support for
different defrag THP defrag policies for memory cgroups were also added.

Signed-off-by: Asier Gutierrez <gutierrez.asier@huawei-partners.com>
Signed-off-by: Anatoly Stepanov <stepanov.anatoly@huawei.com>
Reviewed-by: Alexander Kozhevnikov <alexander.kozhevnikov@huawei-partners.com>

---
 include/linux/huge_mm.h    |   8 +++
 include/linux/memcontrol.h |   4 +-
 mm/huge_memory.c           | 116 ++++++++++++++++++++++---------------
 mm/memcontrol.c            |  31 ++++++++++
 4 files changed, 112 insertions(+), 47 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f99ac9b7e5bc..177c7d3578ed 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -56,6 +56,12 @@ enum transparent_hugepage_flag {
 #define HUGEPAGE_FLAGS_ENABLED_MASK ((1UL << TRANSPARENT_HUGEPAGE_FLAG) |\
 				(1UL << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
 
+#define HUGEPAGE_FLAGS_DEFRAG_MASK ((1UL << TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG) |\
+				(1UL << TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG) |\
+				(1UL << TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG) |\
+				(1UL << TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG) |\
+				(1UL << TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG))
+
 struct kobject;
 struct kobj_attribute;
 
@@ -442,7 +448,9 @@ bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr,
 			   pmd_t *pmdp, struct folio *folio);
 
 int thp_enabled_parse(const char *buf, unsigned long *flags);
+int thp_defrag_parse(const char *buf, unsigned long *flags);
 const char *thp_enabled_string(unsigned long flags);
+const char *thp_defrag_string(unsigned long flags);
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 static inline bool folio_test_pmd_mappable(struct folio *folio)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d78318782af8..a0edf15b3a07 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1634,9 +1634,11 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 int memory_thp_enabled_show(struct seq_file *m, void *v);
+int memory_thp_defrag_show(struct seq_file *m, void *v);
 ssize_t memory_thp_enabled_write(struct kernfs_open_file *of, char *buf,
 			      size_t nbytes, loff_t off);
-
+ssize_t memory_thp_defrag_write(struct kernfs_open_file *of, char *buf,
+			      size_t nbytes, loff_t off);
 int mem_cgroup_thp_flags_update_all(unsigned long flags, unsigned long mask);
 unsigned long memcg_get_thp_flags_all(unsigned long mask);
 unsigned long memcg_get_thp_flags(struct vm_area_struct *vma);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fdffdfc8605c..6e1886b220d9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -311,6 +311,28 @@ const char *thp_enabled_string(unsigned long flags)
 	return output;
 }
 
+const char *thp_defrag_string(unsigned long flags)
+{
+	const char *output;
+
+	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
+		     &flags))
+		output = "[always] defer defer+madvise madvise never";
+	else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
+			  &flags))
+		output = "always [defer] defer+madvise madvise never";
+	else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG,
+			  &flags))
+		output = "always defer [defer+madvise] madvise never";
+	else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
+			  &flags))
+		output = "always defer defer+madvise [madvise] never";
+	else
+		output = "always defer defer+madvise madvise [never]";
+
+	return output;
+}
+
 int thp_enabled_parse(const char *buf, unsigned long *flags)
 {
 	if (sysfs_streq(buf, "always")) {
@@ -328,6 +350,39 @@ int thp_enabled_parse(const char *buf, unsigned long *flags)
 	return 0;
 }
 
+int thp_defrag_parse(const char *buf, unsigned long *flags)
+{
+	if (sysfs_streq(buf, "always")) {
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, flags);
+		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, flags);
+	} else if (sysfs_streq(buf, "defer+madvise")) {
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, flags);
+		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, flags);
+	} else if (sysfs_streq(buf, "defer")) {
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, flags);
+		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, flags);
+	} else if (sysfs_streq(buf, "madvise")) {
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, flags);
+		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, flags);
+	} else if (sysfs_streq(buf, "never")) {
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, flags);
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
 #ifdef CONFIG_SYSFS
 static ssize_t enabled_show(struct kobject *kobj,
 			    struct kobj_attribute *attr, char *buf)
@@ -394,60 +449,29 @@ ssize_t single_hugepage_flag_store(struct kobject *kobj,
 static ssize_t defrag_show(struct kobject *kobj,
 			   struct kobj_attribute *attr, char *buf)
 {
-	const char *output;
-
-	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
-		     &transparent_hugepage_flags))
-		output = "[always] defer defer+madvise madvise never";
-	else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
-			  &transparent_hugepage_flags))
-		output = "always [defer] defer+madvise madvise never";
-	else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG,
-			  &transparent_hugepage_flags))
-		output = "always defer [defer+madvise] madvise never";
-	else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
-			  &transparent_hugepage_flags))
-		output = "always defer defer+madvise [madvise] never";
-	else
-		output = "always defer defer+madvise madvise [never]";
-
-	return sysfs_emit(buf, "%s\n", output);
+	unsigned long flags = transparent_hugepage_flags;
+	return sysfs_emit(buf, "%s\n", thp_defrag_string(flags));
 }
 
 static ssize_t defrag_store(struct kobject *kobj,
 			    struct kobj_attribute *attr,
 			    const char *buf, size_t count)
 {
-	if (sysfs_streq(buf, "always")) {
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
-		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
-	} else if (sysfs_streq(buf, "defer+madvise")) {
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
-		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
-	} else if (sysfs_streq(buf, "defer")) {
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
-		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
-	} else if (sysfs_streq(buf, "madvise")) {
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
-		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
-	} else if (sysfs_streq(buf, "never")) {
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
-	} else
-		return -EINVAL;
+	ssize_t ret = count;
+	int err;
 
-	return count;
+	ret = thp_defrag_parse(buf, &transparent_hugepage_flags) ? : count;
+	if (ret > 0 && IS_ENABLED(CONFIG_MEMCG) &&
+			!mem_cgroup_disabled()) {
+		err = mem_cgroup_thp_flags_update_all(transparent_hugepage_flags,
+							HUGEPAGE_FLAGS_DEFRAG_MASK);
+		if (err)
+			ret = err;
+	}
+
+	return ret;
 }
+
 static struct kobj_attribute defrag_attr = __ATTR_RW(defrag);
 
 static ssize_t use_zero_page_show(struct kobject *kobj,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 938e6894c0b3..53384f0a69af 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3706,6 +3706,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
 		(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
 #endif
+		(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
+		(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
 		(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG));
 		WRITE_ONCE(memcg->thp_anon_orders_inherit, BIT(PMD_ORDER));
 #endif
@@ -4490,6 +4492,30 @@ ssize_t memory_thp_enabled_write(struct kernfs_open_file *of, char *buf,
 	mutex_unlock(&memcg_thp_flags_mutex);
 	return ret;
 }
+
+int memory_thp_defrag_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+	unsigned long flags = READ_ONCE(memcg->thp_flags);
+
+	seq_printf(m, "%s\n", thp_defrag_string(flags));
+	return 0;
+}
+
+ssize_t memory_thp_defrag_write(struct kernfs_open_file *of, char *buf,
+			      size_t nbytes, loff_t off)
+{
+	int ret = nbytes;
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+
+	buf = strstrip(buf);
+
+	mutex_lock(&memcg_thp_flags_mutex);
+	ret = thp_defrag_parse(buf, &memcg->thp_flags) ? : nbytes;
+	mutex_unlock(&memcg_thp_flags_mutex);
+
+	return ret;
+}
 #endif
 
 static struct cftype memory_files[] = {
@@ -4566,6 +4592,11 @@ static struct cftype memory_files[] = {
 		.seq_show = memory_thp_enabled_show,
 		.write = memory_thp_enabled_write,
 	},
+	{
+		.name = "thp_defrag",
+		.seq_show = memory_thp_defrag_show,
+		.write = memory_thp_defrag_write,
+	},
 #endif
 	{ }	/* terminate */
 };
-- 
2.34.1



  parent reply	other threads:[~2024-10-30  8:33 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-30  8:33 [RFC PATCH 0/3] Cgroup-based THP control gutierrez.asier
2024-10-30  8:33 ` [RFC PATCH 1/3] mm: Add thp_flags control for cgroup gutierrez.asier
2024-10-30  8:33 ` [RFC PATCH 2/3] mm: Support for huge pages in cgroups gutierrez.asier
2024-10-30  8:33 ` gutierrez.asier [this message]
2024-10-30  8:38 ` [RFC PATCH 0/3] Cgroup-based THP control Michal Hocko
2024-10-30 12:51   ` Gutierrez Asier
2024-10-30 13:27     ` Michal Hocko
2024-10-30 14:58       ` Gutierrez Asier
2024-10-30 15:15         ` Michal Hocko
2024-10-31  6:06           ` Stepanov Anatoly
2024-10-31  8:33             ` Michal Hocko
2024-10-31 14:37               ` Stepanov Anatoly
2024-11-01  7:35                 ` Michal Hocko
2024-11-01 11:54                   ` Stepanov Anatoly
2024-11-01 13:15                     ` Michal Hocko
2024-11-01 13:24                       ` Stepanov Anatoly
2024-11-01 13:28                         ` Michal Hocko
2024-11-01 13:39                           ` Stepanov Anatoly
2024-11-01 13:50                             ` Michal Hocko
2024-11-01 14:03                               ` Stepanov Anatoly
2024-11-01 16:01                 ` Matthew Wilcox
2024-10-30 13:14 ` Matthew Wilcox
2024-10-30 13:16   ` David Hildenbrand
2024-10-30 14:45 ` Chris Down
2024-10-30 15:04   ` Michal Hocko
2024-10-30 15:08 ` Johannes Weiner
2024-11-01 12:44   ` Stepanov Anatoly

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241030083311.965933-4-gutierrez.asier@huawei-partners.com \
    --to=gutierrez.asier@huawei-partners.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.kozhevnikov@huawei-partners.com \
    --cc=artem.kuzin@huawei.com \
    --cc=baohua@kernel.org \
    --cc=cgroups@vger.kernel.org \
    --cc=david@redhat.com \
    --cc=guohanjun@huawei.com \
    --cc=hannes@cmpxchg.org \
    --cc=hocko@kernel.org \
    --cc=judy.chenhui@huawei.com \
    --cc=kang.sun@huawei.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=muchun.song@linux.dev \
    --cc=peterx@redhat.com \
    --cc=roman.gushchin@linux.dev \
    --cc=ryan.roberts@arm.com \
    --cc=shakeel.butt@linux.dev \
    --cc=stepanov.anatoly@huawei.com \
    --cc=wangkefeng.wang@huawei.com \
    --cc=weiyongjun1@huawei.com \
    --cc=willy@infradead.org \
    --cc=yusongping@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox