From: <zhongjinji@honor.com>
To: <linux-mm@kvack.org>
Cc: <akpm@linux-foundation.org>, <mhocko@suse.com>,
<rientjes@google.com>, <shakeel.butt@linux.dev>,
<npache@redhat.com>, <liulu.liu@honor.com>, <feng.han@honor.com>
Subject: [PATCH] mm: delay oom_reaper only for the process using robust-futex
Date: Thu, 31 Jul 2025 18:29:04 +0800 [thread overview]
Message-ID: <20250731102904.8615-1-zhongjinji@honor.com> (raw)
From: zhongjinji <zhongjinji@honor.com>
After the patch here:
https://lore.kernel.org/all/20220414144042.677008-1-npache@redhat.com/T/#u
was merged, the oom_reaper almost stops working.
But I noticed that many processes do not use robust-futex, so they don’t
access user-space memory during do_exit and don’t run into the problem
mentioned in that patch.
So, this change delays the oom_reaper only when the process uses
robust-futex, letting the oom_reaper work properly in more cases.
Signed-off-by: zhongjinji <zhongjinji@honor.com>
---
mm/oom_kill.c | 41 ++++++++++++++++++++++++++++++-----------
1 file changed, 30 insertions(+), 11 deletions(-)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 25923cfec9c6..7e74dc0ac2a6 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -692,15 +692,18 @@ static void wake_oom_reaper(struct timer_list *timer)
* before the exit path is able to wake the futex waiters.
*/
#define OOM_REAPER_DELAY (2*HZ)
-static void queue_oom_reaper(struct task_struct *tsk)
+static void queue_oom_reaper(struct task_struct *tsk, bool may_access_user)
{
+ unsigned long reaper_delay = 0;
+
/* mm is already queued? */
if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
return;
-
+ if (may_access_user)
+ reaper_delay = OOM_REAPER_DELAY;
get_task_struct(tsk);
timer_setup(&tsk->oom_reaper_timer, wake_oom_reaper, 0);
- tsk->oom_reaper_timer.expires = jiffies + OOM_REAPER_DELAY;
+ tsk->oom_reaper_timer.expires = jiffies + reaper_delay;
add_timer(&tsk->oom_reaper_timer);
}
@@ -742,7 +745,7 @@ static int __init oom_init(void)
}
subsys_initcall(oom_init)
#else
-static inline void queue_oom_reaper(struct task_struct *tsk)
+static inline void queue_oom_reaper(struct task_struct *tsk, bool may_access_user)
{
}
#endif /* CONFIG_MMU */
@@ -864,6 +867,11 @@ static inline bool __task_will_free_mem(struct task_struct *task)
return false;
}
+static inline bool exit_may_access_user(struct task_struct *task)
+{
+ return task->robust_list || task->compat_robust_list;
+}
+
/*
* Checks whether the given task is dying or exiting and likely to
* release its address space. This means that all threads and processes
@@ -871,11 +879,12 @@ static inline bool __task_will_free_mem(struct task_struct *task)
* Caller has to make sure that task->mm is stable (hold task_lock or
* it operates on the current).
*/
-static bool task_will_free_mem(struct task_struct *task)
+static bool task_will_free_mem(struct task_struct *task, bool *may_access_user)
{
struct mm_struct *mm = task->mm;
struct task_struct *p;
bool ret = true;
+ bool access = false;
/*
* Skip tasks without mm because it might have passed its exit_mm and
@@ -888,6 +897,8 @@ static bool task_will_free_mem(struct task_struct *task)
if (!__task_will_free_mem(task))
return false;
+ access |= exit_may_access_user(task);
+
/*
* This task has already been drained by the oom reaper so there are
* only small chances it will free some more
@@ -912,8 +923,11 @@ static bool task_will_free_mem(struct task_struct *task)
ret = __task_will_free_mem(p);
if (!ret)
break;
+ access |= exit_may_access_user(p);
}
rcu_read_unlock();
+ if (may_access_user)
+ *may_access_user = access;
return ret;
}
@@ -923,6 +937,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
struct task_struct *p;
struct mm_struct *mm;
bool can_oom_reap = true;
+ bool may_access_user = false;
p = find_lock_task_mm(victim);
if (!p) {
@@ -950,6 +965,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
* reserves from the user space under its control.
*/
do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID);
+ may_access_user |= exit_may_access_user(victim);
mark_oom_victim(victim);
pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB, UID:%u pgtables:%lukB oom_score_adj:%hd\n",
message, task_pid_nr(victim), victim->comm, K(mm->total_vm),
@@ -990,11 +1006,12 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
if (unlikely(p->flags & PF_KTHREAD))
continue;
do_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_TGID);
+ may_access_user |= exit_may_access_user(p);
}
rcu_read_unlock();
if (can_oom_reap)
- queue_oom_reaper(victim);
+ queue_oom_reaper(victim, may_access_user);
mmdrop(mm);
put_task_struct(victim);
@@ -1020,6 +1037,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
struct mem_cgroup *oom_group;
static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
+ bool may_access_user = false;
/*
* If the task is already exiting, don't alarm the sysadmin or kill
@@ -1027,9 +1045,9 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
* so it can die quickly
*/
task_lock(victim);
- if (task_will_free_mem(victim)) {
+ if (task_will_free_mem(victim, &may_access_user)) {
mark_oom_victim(victim);
- queue_oom_reaper(victim);
+ queue_oom_reaper(victim, may_access_user);
task_unlock(victim);
put_task_struct(victim);
return;
@@ -1112,6 +1130,7 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
bool out_of_memory(struct oom_control *oc)
{
unsigned long freed = 0;
+ bool may_access_user = false;
if (oom_killer_disabled)
return false;
@@ -1128,9 +1147,9 @@ bool out_of_memory(struct oom_control *oc)
* select it. The goal is to allow it to allocate so that it may
* quickly exit and free its memory.
*/
- if (task_will_free_mem(current)) {
+ if (task_will_free_mem(current, &may_access_user)) {
mark_oom_victim(current);
- queue_oom_reaper(current);
+ queue_oom_reaper(current, may_access_user);
return true;
}
@@ -1231,7 +1250,7 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
mm = p->mm;
mmgrab(mm);
- if (task_will_free_mem(p))
+ if (task_will_free_mem(p, NULL))
reap = true;
else {
/* Error only if the work has not been done already */
--
2.17.1
next reply other threads:[~2025-07-31 10:29 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-31 10:29 zhongjinji [this message]
2025-07-31 16:54 ` Shakeel Butt
2025-08-01 13:20 ` zhongjinji
2025-07-31 22:28 ` kernel test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250731102904.8615-1-zhongjinji@honor.com \
--to=zhongjinji@honor.com \
--cc=akpm@linux-foundation.org \
--cc=feng.han@honor.com \
--cc=linux-mm@kvack.org \
--cc=liulu.liu@honor.com \
--cc=mhocko@suse.com \
--cc=npache@redhat.com \
--cc=rientjes@google.com \
--cc=shakeel.butt@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox