linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: hezhongkun <hezhongkun.hzk@bytedance.com>
To: hannes@cmpxchg.org, mhocko@kernel.org, roman.gushchin@linux.dev
Cc: linux-kernel@vger.kernel.org, cgroups@vger.kernel.org,
	linux-mm@kvack.org, lizefan.x@bytedance.com,
	Hezhongkun <hezhongkun.hzk@bytedance.com>
Subject: [PATCH] mm: memcontrol: add the mempolicy interface for cgroup v2.
Date: Tue, 24 May 2022 18:36:38 +0800	[thread overview]
Message-ID: <20220524103638.473-1-hezhongkun.hzk@bytedance.com> (raw)

From: Hezhongkun <hezhongkun.hzk@bytedance.com>

Mempolicy is difficult to use because it is set in-process
via a system call. We want to make it easier to use mempolicy
in cgroups, so that we can control low-priority cgroups to
allocate memory in specified nodes. So this patch want to
adds the mempolicy interface.

the mempolicy priority of memcgroup is higher than the priority
of task. The order of getting the policy is,
memcgroup->policy,task->policy or vma policy, default policy.
memcgroup's policy is owned by itself, so descendants will
not inherit it.

Signed-off-by: Hezhongkun <hezhongkun.hzk@bytedance.com>
---
 include/linux/memcontrol.h |  1 +
 mm/memcontrol.c            | 42 ++++++++++++++++++++++++++++++++++++++
 mm/mempolicy.c             | 30 ++++++++++++++++++++++-----
 3 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 89b14729d59f..2261eeb6100c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -343,6 +343,7 @@ struct mem_cgroup {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	struct deferred_split deferred_split_queue;
 #endif
+	struct mempolicy *mempolicy;
 
 	struct mem_cgroup_per_node *nodeinfo[];
 };
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 598fece89e2b..38108fd4df64 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6332,6 +6332,42 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
 
 	return 0;
 }
+
+static int memory_policy_show(struct seq_file *m, void *v)
+{
+	char buffer[64];
+	struct mempolicy *mpol = mem_cgroup_from_seq(m)->mempolicy;
+
+	memset(buffer, 0, sizeof(buffer));
+
+	if (!mpol || mpol->mode == MPOL_DEFAULT)
+		return 0;
+
+	mpol_to_str(buffer, sizeof(buffer), mpol);
+	seq_printf(m, buffer);
+	seq_putc(m, '\n');
+	return 0;
+}
+
+static ssize_t memory_policy_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	int err = 1;
+	struct mempolicy *mpol, *old;
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+
+	old = memcg->mempolicy;
+	buf = strstrip(buf);
+	err = mpol_parse_str(buf, &mpol);
+
+	if (err)
+		goto out;
+	mpol_put(old);
+	memcg->mempolicy = mpol;
+out:
+	return nbytes;
+}
+
 #endif
 
 static int memory_oom_group_show(struct seq_file *m, void *v)
@@ -6416,6 +6452,12 @@ static struct cftype memory_files[] = {
 		.name = "numa_stat",
 		.seq_show = memory_numa_stat_show,
 	},
+	{
+		.name = "policy",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = memory_policy_show,
+		.write = memory_policy_write,
+	},
 #endif
 	{
 		.name = "oom.group",
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 8c74107a2b15..5153b046f8c3 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -176,6 +176,16 @@ struct mempolicy *get_task_policy(struct task_struct *p)
 	return &default_policy;
 }
 
+struct mempolicy *get_cgrp_or_task_policy(struct task_struct *p)
+{
+	struct mempolicy *pol;
+	struct mem_cgroup *memcg = mem_cgroup_from_task(p);
+
+	pol = (memcg && memcg->mempolicy) ? memcg->mempolicy : get_task_policy(p);
+	return pol;
+}
+
+
 static const struct mempolicy_operations {
 	int (*create)(struct mempolicy *pol, const nodemask_t *nodes);
 	void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes);
@@ -1782,6 +1792,16 @@ static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
 	return pol;
 }
 
+static struct mempolicy *get_cgrp_or_vma_policy(struct vm_area_struct *vma,
+						unsigned long addr)
+{
+	struct mempolicy *pol;
+	struct mem_cgroup *memcg = mem_cgroup_from_task(current);
+
+	pol = (memcg && memcg->mempolicy) ? memcg->mempolicy : get_vma_policy(vma, addr);
+	return pol;
+}
+
 bool vma_policy_mof(struct vm_area_struct *vma)
 {
 	struct mempolicy *pol;
@@ -1896,7 +1916,7 @@ unsigned int mempolicy_slab_node(void)
 	if (!in_task())
 		return node;
 
-	policy = current->mempolicy;
+	policy = get_cgrp_or_task_policy(current);
 	if (!policy)
 		return node;
 
@@ -2005,7 +2025,7 @@ int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags,
 	int nid;
 	int mode;
 
-	*mpol = get_vma_policy(vma, addr);
+	*mpol = get_cgrp_or_vma_policy(vma, addr);
 	*nodemask = NULL;
 	mode = (*mpol)->mode;
 
@@ -2158,7 +2178,7 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 	int preferred_nid;
 	nodemask_t *nmask;
 
-	pol = get_vma_policy(vma, addr);
+	pol = get_cgrp_or_vma_policy(vma, addr);
 
 	if (pol->mode == MPOL_INTERLEAVE) {
 		unsigned nid;
@@ -2257,7 +2277,7 @@ struct page *alloc_pages(gfp_t gfp, unsigned order)
 	struct page *page;
 
 	if (!in_interrupt() && !(gfp & __GFP_THISNODE))
-		pol = get_task_policy(current);
+		pol = get_cgrp_or_task_policy(current);
 
 	/*
 	 * No reference counting needed for current->mempolicy
@@ -2562,7 +2582,7 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 	int polnid = NUMA_NO_NODE;
 	int ret = NUMA_NO_NODE;
 
-	pol = get_vma_policy(vma, addr);
+	pol = get_cgrp_or_vma_policy(vma, addr);
 	if (!(pol->flags & MPOL_F_MOF))
 		goto out;
 
-- 
2.17.1



             reply	other threads:[~2022-05-24 10:36 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-24 10:36 hezhongkun [this message]
2022-05-24 10:47 ` Michal Hocko
2022-05-24 11:46   ` [External] " 贺中坤
2022-05-24 12:04     ` Michal Hocko
2022-05-24 13:10 ` kernel test robot
2022-05-24 15:02 ` kernel test robot
2022-05-24 15:12 ` kernel test robot
2022-05-25  7:56 ` [mm] 6adb0a02c2: WARNING:suspicious_RCU_usage kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220524103638.473-1-hezhongkun.hzk@bytedance.com \
    --to=hezhongkun.hzk@bytedance.com \
    --cc=cgroups@vger.kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lizefan.x@bytedance.com \
    --cc=mhocko@kernel.org \
    --cc=roman.gushchin@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox