linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: <zhongjinji@honor.com>
To: <linux-mm@kvack.org>
Cc: <akpm@linux-foundation.org>, <mhocko@suse.com>,
	<rientjes@google.com>, <shakeel.butt@linux.dev>,
	<npache@redhat.com>, <liulu.liu@honor.com>, <feng.han@honor.com>
Subject: [PATCH] mm: delay oom_reaper only for the process using robust-futex
Date: Thu, 31 Jul 2025 18:29:04 +0800	[thread overview]
Message-ID: <20250731102904.8615-1-zhongjinji@honor.com> (raw)

From: zhongjinji <zhongjinji@honor.com>

After the patch here:
https://lore.kernel.org/all/20220414144042.677008-1-npache@redhat.com/T/#u
was merged, the oom_reaper almost stops working.

But I noticed that many processes do not use robust-futex, so they don’t
access user-space memory during do_exit and don’t run into the problem
mentioned in that patch.

So, this change delays the oom_reaper only when the process uses
robust-futex, letting the oom_reaper work properly in more cases.

Signed-off-by: zhongjinji <zhongjinji@honor.com>
---
 mm/oom_kill.c | 41 ++++++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 25923cfec9c6..7e74dc0ac2a6 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -692,15 +692,18 @@ static void wake_oom_reaper(struct timer_list *timer)
  * before the exit path is able to wake the futex waiters.
  */
 #define OOM_REAPER_DELAY (2*HZ)
-static void queue_oom_reaper(struct task_struct *tsk)
+static void queue_oom_reaper(struct task_struct *tsk, bool may_access_user)
 {
+	unsigned long reaper_delay = 0;
+
 	/* mm is already queued? */
 	if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
 		return;
-
+	if (may_access_user)
+		reaper_delay = OOM_REAPER_DELAY;
 	get_task_struct(tsk);
 	timer_setup(&tsk->oom_reaper_timer, wake_oom_reaper, 0);
-	tsk->oom_reaper_timer.expires = jiffies + OOM_REAPER_DELAY;
+	tsk->oom_reaper_timer.expires = jiffies + reaper_delay;
 	add_timer(&tsk->oom_reaper_timer);
 }
 
@@ -742,7 +745,7 @@ static int __init oom_init(void)
 }
 subsys_initcall(oom_init)
 #else
-static inline void queue_oom_reaper(struct task_struct *tsk)
+static inline void queue_oom_reaper(struct task_struct *tsk, bool may_access_user)
 {
 }
 #endif /* CONFIG_MMU */
@@ -864,6 +867,11 @@ static inline bool __task_will_free_mem(struct task_struct *task)
 	return false;
 }
 
+static inline bool exit_may_access_user(struct task_struct *task)
+{
+	return task->robust_list || task->compat_robust_list;
+}
+
 /*
  * Checks whether the given task is dying or exiting and likely to
  * release its address space. This means that all threads and processes
@@ -871,11 +879,12 @@ static inline bool __task_will_free_mem(struct task_struct *task)
  * Caller has to make sure that task->mm is stable (hold task_lock or
  * it operates on the current).
  */
-static bool task_will_free_mem(struct task_struct *task)
+static bool task_will_free_mem(struct task_struct *task, bool *may_access_user)
 {
 	struct mm_struct *mm = task->mm;
 	struct task_struct *p;
 	bool ret = true;
+	bool access = false;
 
 	/*
 	 * Skip tasks without mm because it might have passed its exit_mm and
@@ -888,6 +897,8 @@ static bool task_will_free_mem(struct task_struct *task)
 	if (!__task_will_free_mem(task))
 		return false;
 
+	access |= exit_may_access_user(task);
+
 	/*
 	 * This task has already been drained by the oom reaper so there are
 	 * only small chances it will free some more
@@ -912,8 +923,11 @@ static bool task_will_free_mem(struct task_struct *task)
 		ret = __task_will_free_mem(p);
 		if (!ret)
 			break;
+		access |= exit_may_access_user(p);
 	}
 	rcu_read_unlock();
+	if (may_access_user)
+		*may_access_user = access;
 
 	return ret;
 }
@@ -923,6 +937,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
 	struct task_struct *p;
 	struct mm_struct *mm;
 	bool can_oom_reap = true;
+	bool may_access_user = false;
 
 	p = find_lock_task_mm(victim);
 	if (!p) {
@@ -950,6 +965,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
 	 * reserves from the user space under its control.
 	 */
 	do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID);
+	may_access_user |= exit_may_access_user(victim);
 	mark_oom_victim(victim);
 	pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB, UID:%u pgtables:%lukB oom_score_adj:%hd\n",
 		message, task_pid_nr(victim), victim->comm, K(mm->total_vm),
@@ -990,11 +1006,12 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
 		if (unlikely(p->flags & PF_KTHREAD))
 			continue;
 		do_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_TGID);
+		may_access_user |= exit_may_access_user(p);
 	}
 	rcu_read_unlock();
 
 	if (can_oom_reap)
-		queue_oom_reaper(victim);
+		queue_oom_reaper(victim, may_access_user);
 
 	mmdrop(mm);
 	put_task_struct(victim);
@@ -1020,6 +1037,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 	struct mem_cgroup *oom_group;
 	static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
 					      DEFAULT_RATELIMIT_BURST);
+	bool may_access_user = false;
 
 	/*
 	 * If the task is already exiting, don't alarm the sysadmin or kill
@@ -1027,9 +1045,9 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 	 * so it can die quickly
 	 */
 	task_lock(victim);
-	if (task_will_free_mem(victim)) {
+	if (task_will_free_mem(victim, &may_access_user)) {
 		mark_oom_victim(victim);
-		queue_oom_reaper(victim);
+		queue_oom_reaper(victim, may_access_user);
 		task_unlock(victim);
 		put_task_struct(victim);
 		return;
@@ -1112,6 +1130,7 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
 bool out_of_memory(struct oom_control *oc)
 {
 	unsigned long freed = 0;
+	bool may_access_user = false;
 
 	if (oom_killer_disabled)
 		return false;
@@ -1128,9 +1147,9 @@ bool out_of_memory(struct oom_control *oc)
 	 * select it.  The goal is to allow it to allocate so that it may
 	 * quickly exit and free its memory.
 	 */
-	if (task_will_free_mem(current)) {
+	if (task_will_free_mem(current, &may_access_user)) {
 		mark_oom_victim(current);
-		queue_oom_reaper(current);
+		queue_oom_reaper(current, may_access_user);
 		return true;
 	}
 
@@ -1231,7 +1250,7 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
 	mm = p->mm;
 	mmgrab(mm);
 
-	if (task_will_free_mem(p))
+	if (task_will_free_mem(p, NULL))
 		reap = true;
 	else {
 		/* Error only if the work has not been done already */
-- 
2.17.1



             reply	other threads:[~2025-07-31 10:29 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-31 10:29 zhongjinji [this message]
2025-07-31 16:54 ` Shakeel Butt
2025-08-01 13:20   ` zhongjinji
2025-07-31 22:28 ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250731102904.8615-1-zhongjinji@honor.com \
    --to=zhongjinji@honor.com \
    --cc=akpm@linux-foundation.org \
    --cc=feng.han@honor.com \
    --cc=linux-mm@kvack.org \
    --cc=liulu.liu@honor.com \
    --cc=mhocko@suse.com \
    --cc=npache@redhat.com \
    --cc=rientjes@google.com \
    --cc=shakeel.butt@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox