From: Yafang Shao <laoar.shao@gmail.com>
To: akpm@linux-foundation.org
Cc: linux-mm@kvack.org, Yafang Shao <laoar.shao@gmail.com>,
Michal Hocko <mhocko@suse.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Vladimir Davydov <vdavydov.dev@gmail.com>,
Roman Gushchin <guro@fb.com>
Subject: [PATCH] mm, memcg: introduce per memcg oom_score_adj
Date: Thu, 22 Aug 2019 04:56:29 -0400 [thread overview]
Message-ID: <1566464189-1631-1-git-send-email-laoar.shao@gmail.com> (raw)
- Why we need a per memcg oom_score_adj setting ?
This is easy to deploy and very convenient for container.
When we use container, we always treat memcg as a whole, if we have a per
memcg oom_score_adj setting we don't need to set it process by process.
It will make the user exhausted to set it to all processes in a memcg.
In this patch, a file named memory.oom.score_adj is introduced.
The valid value of it is from -1000 to +1000, which is same with
process-level oom_score_adj.
When OOM is invoked, the effective oom_score_adj is as bellow,
effective oom_score_adj = original oom_score_adj + memory.oom.score_adj
The valid effective value is also from -1000 to +1000.
This is something like a hook to re-calculate the oom_score_adj.
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Roman Gushchin <guro@fb.com>
---
include/linux/memcontrol.h | 24 ++++++++++++++++++++++++
mm/memcontrol.c | 38 ++++++++++++++++++++++++++++++++++++++
mm/oom_kill.c | 20 ++++++++------------
3 files changed, 70 insertions(+), 12 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 2cd4359..d2dbde5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -21,6 +21,7 @@
#include <linux/vmstat.h>
#include <linux/writeback.h>
#include <linux/page-flags.h>
+#include <linux/oom.h>
struct mem_cgroup;
struct page;
@@ -224,6 +225,7 @@ struct mem_cgroup {
* Should the OOM killer kill all belonging tasks, had it kill one?
*/
bool oom_group;
+ short oom_score_adj;
/* protected by memcg_oom_lock */
bool oom_lock;
@@ -538,6 +540,23 @@ static inline bool task_in_memcg_oom(struct task_struct *p)
return p->memcg_in_oom;
}
+static inline int mem_cgroup_score_adj(struct task_struct *p, int task_adj)
+{
+ struct mem_cgroup *memcg;
+ int adj = task_adj;
+
+ memcg = mem_cgroup_from_task(p);
+ if (memcg != root_mem_cgroup) {
+ adj += memcg->oom_score_adj;
+ if (adj < OOM_SCORE_ADJ_MIN)
+ adj = OOM_SCORE_ADJ_MIN;
+ else if (adj > OOM_SCORE_ADJ_MAX)
+ adj = OOM_SCORE_ADJ_MAX;
+ }
+
+ return adj;
+}
+
bool mem_cgroup_oom_synchronize(bool wait);
struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
struct mem_cgroup *oom_domain);
@@ -987,6 +1006,11 @@ static inline bool task_in_memcg_oom(struct task_struct *p)
return false;
}
+static inline int mem_cgroup_score_adj(struct task_struct *p, int task_adj)
+{
+ return task_adj;
+}
+
static inline bool mem_cgroup_oom_synchronize(bool wait)
{
return false;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6f5c0c5..065285c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5856,6 +5856,38 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
return nbytes;
}
+static int memory_oom_score_adj_show(struct seq_file *m, void *v)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+
+ seq_printf(m, "%d\n", memcg->oom_score_adj);
+
+ return 0;
+}
+
+static ssize_t memory_oom_score_adj_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+ int oom_score_adj;
+ int ret;
+
+ buf = strstrip(buf);
+ if (!buf)
+ return -EINVAL;
+
+ ret = kstrtoint(buf, 0, &oom_score_adj);
+ if (ret)
+ return ret;
+
+ if (oom_score_adj > 1000 || oom_score_adj < -1000)
+ return -EINVAL;
+
+ memcg->oom_score_adj = oom_score_adj;
+
+ return nbytes;
+}
+
static struct cftype memory_files[] = {
{
.name = "current",
@@ -5909,6 +5941,12 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
.seq_show = memory_oom_group_show,
.write = memory_oom_group_write,
},
+ {
+ .name = "oom.score_adj",
+ .flags = CFTYPE_NOT_ON_ROOT | CFTYPE_NS_DELEGATABLE,
+ .seq_show = memory_oom_score_adj_show,
+ .write = memory_oom_score_adj_write,
+ },
{ } /* terminate */
};
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index eda2e2a..f3b0276 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -212,13 +212,7 @@ unsigned long oom_badness(struct task_struct *p, unsigned long totalpages)
* unkillable or have been already oom reaped or the are in
* the middle of vfork
*/
- adj = (long)p->signal->oom_score_adj;
- if (adj == OOM_SCORE_ADJ_MIN ||
- test_bit(MMF_OOM_SKIP, &p->mm->flags) ||
- in_vfork(p)) {
- task_unlock(p);
- return 0;
- }
+ adj = mem_cgroup_score_adj(p, p->signal->oom_score_adj);
/*
* The baseline for the badness score is the proportion of RAM that each
@@ -404,7 +398,8 @@ static int dump_task(struct task_struct *p, void *arg)
task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
mm_pgtables_bytes(task->mm),
get_mm_counter(task->mm, MM_SWAPENTS),
- task->signal->oom_score_adj, task->comm);
+ mem_cgroup_score_adj(task, task->signal->oom_score_adj),
+ task->comm);
task_unlock(task);
return 0;
@@ -453,7 +448,7 @@ static void dump_header(struct oom_control *oc, struct task_struct *p)
{
pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
- current->signal->oom_score_adj);
+ mem_cgroup_score_adj(current, current->signal->oom_score_adj));
if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order)
pr_warn("COMPACTION is disabled!!!\n");
@@ -939,8 +934,8 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
*/
static int oom_kill_memcg_member(struct task_struct *task, void *message)
{
- if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN &&
- !is_global_init(task)) {
+ if (mem_cgroup_score_adj(task, task->signal->oom_score_adj) !=
+ OOM_SCORE_ADJ_MIN && !is_global_init(task)) {
get_task_struct(task);
__oom_kill_process(task, message);
}
@@ -1085,7 +1080,8 @@ bool out_of_memory(struct oom_control *oc)
if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
current->mm && !oom_unkillable_task(current) &&
oom_cpuset_eligible(current, oc) &&
- current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
+ mem_cgroup_score_adj(current, current->signal->oom_score_adj) !=
+ OOM_SCORE_ADJ_MIN) {
get_task_struct(current);
oc->chosen = current;
oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
--
1.8.3.1
next reply other threads:[~2019-08-22 8:56 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-08-22 8:56 Yafang Shao [this message]
2019-08-22 9:19 ` Michal Hocko
2019-08-22 9:34 ` Yafang Shao
2019-08-22 10:59 ` Michal Hocko
2019-08-22 22:46 ` Roman Gushchin
2019-08-23 1:26 ` Yafang Shao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1566464189-1631-1-git-send-email-laoar.shao@gmail.com \
--to=laoar.shao@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=guro@fb.com \
--cc=hannes@cmpxchg.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@suse.com \
--cc=vdavydov.dev@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox