[PATCH] mm: delay oom_reaper only for the process using robust-futex

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] mm: delay oom_reaper only for the process using robust-futex
@ 2025-07-31 10:29 zhongjinji
  2025-07-31 16:54 ` Shakeel Butt
  2025-07-31 22:28 ` kernel test robot
  0 siblings, 2 replies; 4+ messages in thread
From: zhongjinji @ 2025-07-31 10:29 UTC (permalink / raw)
  To: linux-mm
  Cc: akpm, mhocko, rientjes, shakeel.butt, npache, liulu.liu, feng.han

From: zhongjinji <zhongjinji@honor.com>

After the patch here:
https://lore.kernel.org/all/20220414144042.677008-1-npache@redhat.com/T/#u
was merged, the oom_reaper almost stops working.

But I noticed that many processes do not use robust-futex, so they don’t
access user-space memory during do_exit and don’t run into the problem
mentioned in that patch.

So, this change delays the oom_reaper only when the process uses
robust-futex, letting the oom_reaper work properly in more cases.

Signed-off-by: zhongjinji <zhongjinji@honor.com>
---
 mm/oom_kill.c | 41 ++++++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 25923cfec9c6..7e74dc0ac2a6 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -692,15 +692,18 @@ static void wake_oom_reaper(struct timer_list *timer)
  * before the exit path is able to wake the futex waiters.
  */
 #define OOM_REAPER_DELAY (2*HZ)
-static void queue_oom_reaper(struct task_struct *tsk)
+static void queue_oom_reaper(struct task_struct *tsk, bool may_access_user)
 {
+	unsigned long reaper_delay = 0;
+
 	/* mm is already queued? */
 	if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
 		return;
-
+	if (may_access_user)
+		reaper_delay = OOM_REAPER_DELAY;
 	get_task_struct(tsk);
 	timer_setup(&tsk->oom_reaper_timer, wake_oom_reaper, 0);
-	tsk->oom_reaper_timer.expires = jiffies + OOM_REAPER_DELAY;
+	tsk->oom_reaper_timer.expires = jiffies + reaper_delay;
 	add_timer(&tsk->oom_reaper_timer);
 }
 
@@ -742,7 +745,7 @@ static int __init oom_init(void)
 }
 subsys_initcall(oom_init)
 #else
-static inline void queue_oom_reaper(struct task_struct *tsk)
+static inline void queue_oom_reaper(struct task_struct *tsk, bool may_access_user)
 {
 }
 #endif /* CONFIG_MMU */
@@ -864,6 +867,11 @@ static inline bool __task_will_free_mem(struct task_struct *task)
 	return false;
 }
 
+static inline bool exit_may_access_user(struct task_struct *task)
+{
+	return task->robust_list || task->compat_robust_list;
+}
+
 /*
  * Checks whether the given task is dying or exiting and likely to
  * release its address space. This means that all threads and processes
@@ -871,11 +879,12 @@ static inline bool __task_will_free_mem(struct task_struct *task)
  * Caller has to make sure that task->mm is stable (hold task_lock or
  * it operates on the current).
  */
-static bool task_will_free_mem(struct task_struct *task)
+static bool task_will_free_mem(struct task_struct *task, bool *may_access_user)
 {
 	struct mm_struct *mm = task->mm;
 	struct task_struct *p;
 	bool ret = true;
+	bool access = false;
 
 	/*
 	 * Skip tasks without mm because it might have passed its exit_mm and
@@ -888,6 +897,8 @@ static bool task_will_free_mem(struct task_struct *task)
 	if (!__task_will_free_mem(task))
 		return false;
 
+	access |= exit_may_access_user(task);
+
 	/*
 	 * This task has already been drained by the oom reaper so there are
 	 * only small chances it will free some more
@@ -912,8 +923,11 @@ static bool task_will_free_mem(struct task_struct *task)
 		ret = __task_will_free_mem(p);
 		if (!ret)
 			break;
+		access |= exit_may_access_user(p);
 	}
 	rcu_read_unlock();
+	if (may_access_user)
+		*may_access_user = access;
 
 	return ret;
 }
@@ -923,6 +937,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
 	struct task_struct *p;
 	struct mm_struct *mm;
 	bool can_oom_reap = true;
+	bool may_access_user = false;
 
 	p = find_lock_task_mm(victim);
 	if (!p) {
@@ -950,6 +965,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
 	 * reserves from the user space under its control.
 	 */
 	do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID);
+	may_access_user |= exit_may_access_user(victim);
 	mark_oom_victim(victim);
 	pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB, UID:%u pgtables:%lukB oom_score_adj:%hd\n",
 		message, task_pid_nr(victim), victim->comm, K(mm->total_vm),
@@ -990,11 +1006,12 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
 		if (unlikely(p->flags & PF_KTHREAD))
 			continue;
 		do_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_TGID);
+		may_access_user |= exit_may_access_user(p);
 	}
 	rcu_read_unlock();
 
 	if (can_oom_reap)
-		queue_oom_reaper(victim);
+		queue_oom_reaper(victim, may_access_user);
 
 	mmdrop(mm);
 	put_task_struct(victim);
@@ -1020,6 +1037,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 	struct mem_cgroup *oom_group;
 	static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
 					      DEFAULT_RATELIMIT_BURST);
+	bool may_access_user = false;
 
 	/*
 	 * If the task is already exiting, don't alarm the sysadmin or kill
@@ -1027,9 +1045,9 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 	 * so it can die quickly
 	 */
 	task_lock(victim);
-	if (task_will_free_mem(victim)) {
+	if (task_will_free_mem(victim, &may_access_user)) {
 		mark_oom_victim(victim);
-		queue_oom_reaper(victim);
+		queue_oom_reaper(victim, may_access_user);
 		task_unlock(victim);
 		put_task_struct(victim);
 		return;
@@ -1112,6 +1130,7 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
 bool out_of_memory(struct oom_control *oc)
 {
 	unsigned long freed = 0;
+	bool may_access_user = false;
 
 	if (oom_killer_disabled)
 		return false;
@@ -1128,9 +1147,9 @@ bool out_of_memory(struct oom_control *oc)
 	 * select it.  The goal is to allow it to allocate so that it may
 	 * quickly exit and free its memory.
 	 */
-	if (task_will_free_mem(current)) {
+	if (task_will_free_mem(current, &may_access_user)) {
 		mark_oom_victim(current);
-		queue_oom_reaper(current);
+		queue_oom_reaper(current, may_access_user);
 		return true;
 	}
 
@@ -1231,7 +1250,7 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
 	mm = p->mm;
 	mmgrab(mm);
 
-	if (task_will_free_mem(p))
+	if (task_will_free_mem(p, NULL))
 		reap = true;
 	else {
 		/* Error only if the work has not been done already */
-- 
2.17.1



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] mm: delay oom_reaper only for the process using robust-futex
  2025-07-31 10:29 [PATCH] mm: delay oom_reaper only for the process using robust-futex zhongjinji
@ 2025-07-31 16:54 ` Shakeel Butt
  2025-08-01 13:20   ` zhongjinji
  2025-07-31 22:28 ` kernel test robot
  1 sibling, 1 reply; 4+ messages in thread
From: Shakeel Butt @ 2025-07-31 16:54 UTC (permalink / raw)
  To: zhongjinji
  Cc: linux-mm, akpm, mhocko, rientjes, npache, liulu.liu, feng.han, jsavitz

Hi Zhongjinji,

On Thu, Jul 31, 2025 at 06:29:04PM +0800, zhongjinji@honor.com wrote:
> From: zhongjinji <zhongjinji@honor.com>
> 
> After the patch here:
> https://lore.kernel.org/all/20220414144042.677008-1-npache@redhat.com/T/#u
> was merged, the oom_reaper almost stops working.

Can you expand on this? How exactly it stopped working? Is this due to
oom-killed processes are exiting in timely fashion or are you seeing the
system remains in memory pressure state longer?

> 
> But I noticed that many processes do not use robust-futex, so they don’t
> access user-space memory during do_exit and don’t run into the problem
> mentioned in that patch.
> 
> So, this change delays the oom_reaper only when the process uses
> robust-futex, letting the oom_reaper work properly in more cases.

The direction seems reasonable.

>  
> +static inline bool exit_may_access_user(struct task_struct *task)
> +{
> +	return task->robust_list || task->compat_robust_list;

Here I am not sure. This robust_list seems like a per-task list and we
are making a process level decision based on a given task's usage of
robust list. Can we have a scenario where some tasks/threads of a
process does not have robust list and others have? If yes this can cause
similar similar which the original patch tried to solve, right?

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] mm: delay oom_reaper only for the process using robust-futex
  2025-07-31 10:29 [PATCH] mm: delay oom_reaper only for the process using robust-futex zhongjinji
  2025-07-31 16:54 ` Shakeel Butt
@ 2025-07-31 22:28 ` kernel test robot
  1 sibling, 0 replies; 4+ messages in thread
From: kernel test robot @ 2025-07-31 22:28 UTC (permalink / raw)
  To: zhongjinji, linux-mm
  Cc: oe-kbuild-all, akpm, mhocko, rientjes, shakeel.butt, npache,
	liulu.liu, feng.han

Hi,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/zhongjinji-honor-com/mm-delay-oom_reaper-only-for-the-process-using-robust-futex/20250731-183058
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20250731102904.8615-1-zhongjinji%40honor.com
patch subject: [PATCH] mm: delay oom_reaper only for the process using robust-futex
config: i386-buildonly-randconfig-002-20250801 (https://download.01.org/0day-ci/archive/20250801/202508010627.P8Xrr4xy-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14+deb12u1) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250801/202508010627.P8Xrr4xy-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202508010627.P8Xrr4xy-lkp@intel.com/

All error/warnings (new ones prefixed by >>):

   mm/oom_kill.c: In function 'exit_may_access_user':
>> mm/oom_kill.c:872:20: error: 'struct task_struct' has no member named 'robust_list'
     872 |         return task->robust_list || task->compat_robust_list;
         |                    ^~
>> mm/oom_kill.c:872:41: error: 'struct task_struct' has no member named 'compat_robust_list'
     872 |         return task->robust_list || task->compat_robust_list;
         |                                         ^~
>> mm/oom_kill.c:873:1: warning: control reaches end of non-void function [-Wreturn-type]
     873 | }
         | ^


vim +872 mm/oom_kill.c

   869	
   870	static inline bool exit_may_access_user(struct task_struct *task)
   871	{
 > 872		return task->robust_list || task->compat_robust_list;
 > 873	}
   874	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] mm: delay oom_reaper only for the process using robust-futex
  2025-07-31 16:54 ` Shakeel Butt
@ 2025-08-01 13:20   ` zhongjinji
  0 siblings, 0 replies; 4+ messages in thread
From: zhongjinji @ 2025-08-01 13:20 UTC (permalink / raw)
  To: shakeel.butt
  Cc: akpm, feng.han, jsavitz, linux-mm, liulu.liu, mhocko, npache,
	rientjes, zhongjinji

Yeah, it is enough time for any process on Android to exit within 2 seconds,
so the oom reaper basically does not work.
Currently, we have not found any users of robust futex on Android.
As we know, adding killed processes to the oom_reaper queue can help
free their memory faster.
Delaying the oom reaper for all processes might lead to less efficient
memory reclamation, which could be undesirable.
>
>> 
>> But I noticed that many processes do not use robust-futex, so they do not
>> access user-space memory during do_exit and do not run into the problem
>> mentioned in that patch.
>> 
>> So, this change delays the oom_reaper only when the process uses
>> robust-futex, letting the oom_reaper work properly in more cases.
>
>The direction seems reasonable.
>
>>  
>> +static inline bool exit_may_access_user(struct task_struct *task)
>> +{
>> +	return task->robust_list || task->compat_robust_list;
>
>Here I am not sure. This robust_list seems like a per-task list and we
>are making a process level decision based on a given task's usage of
>robust list. Can we have a scenario where some tasks/threads of a
>process does not have robust list and others have? If yes this can cause
>similar similar which the original patch tried to solve, right?

Yes, we definitely need to check whether all threads have a robust list.
Thanks for the reminder.
I will include this fix in the next version.



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-08-01 13:20 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-07-31 10:29 [PATCH] mm: delay oom_reaper only for the process using robust-futex zhongjinji
2025-07-31 16:54 ` Shakeel Butt
2025-08-01 13:20   ` zhongjinji
2025-07-31 22:28 ` kernel test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox