linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Michal Hocko <mhocko@suse.cz>
To: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: linux-mm@kvack.org, akpm@linux-foundation.org, oleg@redhat.com,
	rientjes@google.com, vdavydov@parallels.com, mst@redhat.com
Subject: Re: [PATCH 8/8] mm,oom_reaper: Make OOM reaper use list of mm_struct.
Date: Thu, 7 Jul 2016 16:15:50 +0200	[thread overview]
Message-ID: <20160707141548.GO5379@dhcp22.suse.cz> (raw)
In-Reply-To: <201607031141.FII82373.FMHQLFOOtVSJOF@I-love.SAKURA.ne.jp>

On Sun 03-07-16 11:41:59, Tetsuo Handa wrote:
> >From ce97a7f6b94a6003bc2b41e5f69da2fedc934a9d Mon Sep 17 00:00:00 2001
> From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Date: Sat, 2 Jul 2016 23:10:06 +0900
> Subject: [PATCH 8/8] mm,oom_reaper: Make OOM reaper use list of mm_struct.
> 
> Since OOM reaping is per mm_struct operation, it is natural to use
> list of mm_struct used by OOM victims.

I would expect that this would be an immediate follow up for "[PATCH 3/8]
mm,oom: Use list of mm_struct used by OOM victims."

> This patch eliminates find_lock_task_mm() usage from the OOM reaper.

It would be good to explain what are the consequences.

> This patch fixes what commit 36324a990cf578b5 ("oom: clear TIF_MEMDIE
> after oom_reaper managed to unmap the address space") and
> commit 449d777d7ad6d7f9 ("mm, oom_reaper: clear TIF_MEMDIE for all tasks
> queued for oom_reaper") tried to address, by always calling exit_oom_mm()
> after OOM reap attempt was made.

And rather than mentioning the above two commits it would be better to
explain what is the difference wrt. to the current implementation.

Other than that it looks good to me.

> 
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> ---
>  include/linux/oom.h   |  8 -----
>  include/linux/sched.h |  3 --
>  mm/memcontrol.c       |  1 -
>  mm/oom_kill.c         | 84 +++++++++++++++++----------------------------------
>  4 files changed, 27 insertions(+), 69 deletions(-)
> 
> diff --git a/include/linux/oom.h b/include/linux/oom.h
> index 4844325..1a212c1 100644
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -71,14 +71,6 @@ static inline bool oom_task_origin(const struct task_struct *p)
>  
>  extern void mark_oom_victim(struct task_struct *tsk, struct oom_control *oc);
>  
> -#ifdef CONFIG_MMU
> -extern void wake_oom_reaper(struct task_struct *tsk);
> -#else
> -static inline void wake_oom_reaper(struct task_struct *tsk)
> -{
> -}
> -#endif
> -
>  extern unsigned long oom_badness(struct task_struct *p,
>  		struct mem_cgroup *memcg, const nodemask_t *nodemask,
>  		unsigned long totalpages);
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index f472f27..4379279 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1916,9 +1916,6 @@ struct task_struct {
>  	unsigned long	task_state_change;
>  #endif
>  	int pagefault_disabled;
> -#ifdef CONFIG_MMU
> -	struct task_struct *oom_reaper_list;
> -#endif
>  /* CPU-specific state of this task */
>  	struct thread_struct thread;
>  /*
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index c17160d..9acc840 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1236,7 +1236,6 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
>  	 */
>  	if (task_will_free_mem(current)) {
>  		mark_oom_victim(current, &oc);
> -		wake_oom_reaper(current);
>  		goto unlock;
>  	}
>  
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 317ce2c..bdc192f 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -468,8 +468,6 @@ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
>   * victim (if that is possible) to help the OOM killer to move on.
>   */
>  static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
> -static struct task_struct *oom_reaper_list;
> -static DEFINE_SPINLOCK(oom_reaper_lock);
>  
>  static bool __oom_reap_vmas(struct mm_struct *mm)
>  {
> @@ -540,30 +538,27 @@ static bool __oom_reap_vmas(struct mm_struct *mm)
>  }
>  
>  #define MAX_OOM_REAP_RETRIES 10
> -static void oom_reap_task(struct task_struct *tsk)
> +static void oom_reap_vmas(struct mm_struct *mm)
>  {
>  	int attempts = 0;
> -	struct mm_struct *mm = NULL;
> -	struct task_struct *p = find_lock_task_mm(tsk);
> +	bool ret;
>  
>  	/*
> -	 * Make sure we find the associated mm_struct even when the particular
> -	 * thread has already terminated and cleared its mm.
> -	 * We might have race with exit path so consider our work done if there
> -	 * is no mm.
> +	 * Check MMF_OOM_REAPED after holding oom_lock because
> +	 * oom_kill_process() might find this mm pinned.
>  	 */
> -	if (!p)
> -		goto done;
> -	mm = p->mm;
> -	atomic_inc(&mm->mm_count);
> -	task_unlock(p);
> +	mutex_lock(&oom_lock);
> +	ret = test_bit(MMF_OOM_REAPED, &mm->flags);
> +	mutex_unlock(&oom_lock);
> +	if (ret)
> +		return;
>  
>  	/* Retry the down_read_trylock(mmap_sem) a few times */
>  	while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_vmas(mm))
>  		schedule_timeout_idle(HZ/10);
>  
>  	if (attempts <= MAX_OOM_REAP_RETRIES)
> -		goto done;
> +		return;
>  
>  	pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
>  		mm->oom_mm.pid, mm->oom_mm.comm);
> @@ -580,51 +575,30 @@ static void oom_reap_task(struct task_struct *tsk)
>  		set_bit(MMF_OOM_REAPED, &mm->flags);
>  	}
>  	debug_show_all_locks();
> -
> -done:
> -	tsk->oom_reaper_list = NULL;
> -	/* Drop a reference taken by wake_oom_reaper */
> -	put_task_struct(tsk);
> -	/* Drop a reference taken above. */
> -	if (mm)
> -		mmdrop(mm);
>  }
>  
>  static int oom_reaper(void *unused)
>  {
>  	while (true) {
> -		struct task_struct *tsk = NULL;
> -
> -		wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL);
> -		spin_lock(&oom_reaper_lock);
> -		if (oom_reaper_list != NULL) {
> -			tsk = oom_reaper_list;
> -			oom_reaper_list = tsk->oom_reaper_list;
> -		}
> -		spin_unlock(&oom_reaper_lock);
> -
> -		if (tsk)
> -			oom_reap_task(tsk);
> +		struct mm_struct *mm = NULL;
> +
> +		wait_event_freezable(oom_reaper_wait,
> +				     !list_empty(&oom_mm_list));
> +		spin_lock(&oom_mm_lock);
> +		if (!list_empty(&oom_mm_list))
> +			mm = list_first_entry(&oom_mm_list, struct mm_struct,
> +					      oom_mm.list);
> +		spin_unlock(&oom_mm_lock);
> +		if (!mm)
> +			continue;
> +		oom_reap_vmas(mm);
> +		/* Drop a reference taken by mark_oom_victim(). */
> +		exit_oom_mm(mm);
>  	}
>  
>  	return 0;
>  }
>  
> -void wake_oom_reaper(struct task_struct *tsk)
> -{
> -	/* tsk is already queued? */
> -	if (tsk == oom_reaper_list || tsk->oom_reaper_list)
> -		return;
> -
> -	get_task_struct(tsk);
> -
> -	spin_lock(&oom_reaper_lock);
> -	tsk->oom_reaper_list = oom_reaper_list;
> -	oom_reaper_list = tsk;
> -	spin_unlock(&oom_reaper_lock);
> -	wake_up(&oom_reaper_wait);
> -}
> -
>  static int __init oom_init(void)
>  {
>  	kthread_run(oom_reaper, NULL, "oom_reaper");
> @@ -667,6 +641,9 @@ void mark_oom_victim(struct task_struct *tsk, struct oom_control *oc)
>  		mm->oom_mm.pid = task_pid_nr(tsk);
>  #endif
>  		list_add_tail(&mm->oom_mm.list, &oom_mm_list);
> +#ifdef CONFIG_MMU
> +		wake_up(&oom_reaper_wait);
> +#endif
>  	}
>  	spin_unlock(&oom_mm_lock);
>  	/*
> @@ -816,7 +793,6 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
>  	unsigned int victim_points = 0;
>  	static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
>  					      DEFAULT_RATELIMIT_BURST);
> -	bool can_oom_reap = true;
>  
>  	/*
>  	 * If the task is already exiting, don't alarm the sysadmin or kill
> @@ -825,7 +801,6 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
>  	task_lock(p);
>  	if (task_will_free_mem(p)) {
>  		mark_oom_victim(p, oc);
> -		wake_oom_reaper(p);
>  		task_unlock(p);
>  		put_task_struct(p);
>  		return;
> @@ -915,7 +890,6 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
>  			 * memory might be still used. Hide the mm from the oom
>  			 * killer to guarantee OOM forward progress.
>  			 */
> -			can_oom_reap = false;
>  			set_bit(MMF_OOM_REAPED, &mm->flags);
>  			pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n",
>  					task_pid_nr(victim), victim->comm,
> @@ -926,9 +900,6 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
>  	}
>  	rcu_read_unlock();
>  
> -	if (can_oom_reap)
> -		wake_oom_reaper(victim);
> -
>  	mmdrop(mm);
>  	put_task_struct(victim);
>  }
> @@ -1004,7 +975,6 @@ bool out_of_memory(struct oom_control *oc)
>  	 */
>  	if (task_will_free_mem(current)) {
>  		mark_oom_victim(current, oc);
> -		wake_oom_reaper(current);
>  		return true;
>  	}
>  
> -- 
> 1.8.3.1

-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2016-07-07 14:15 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-03  2:35 [PATCH 0/8] Change OOM killer to " Tetsuo Handa
2016-07-03  2:36 ` [PATCH 1/8] mm,oom_reaper: Remove pointless kthread_run() failure check Tetsuo Handa
2016-07-03 12:42   ` Oleg Nesterov
2016-07-03 16:03     ` Tetsuo Handa
2016-07-03 17:10       ` Oleg Nesterov
2016-07-03 21:53         ` Tetsuo Handa
2016-07-04 11:13           ` Oleg Nesterov
2016-07-07 11:14           ` Michal Hocko
2016-07-03  2:37 ` [PATCH 2/8] mm,oom_reaper: Reduce find_lock_task_mm() usage Tetsuo Handa
2016-07-07 11:19   ` Michal Hocko
2016-07-03  2:38 ` [PATCH 3/8] mm,oom: Use list of mm_struct used by OOM victims Tetsuo Handa
2016-07-04 10:39   ` Oleg Nesterov
2016-07-04 12:50     ` Tetsuo Handa
2016-07-04 18:25       ` Oleg Nesterov
2016-07-05 10:43         ` Tetsuo Handa
2016-07-05 20:52           ` Oleg Nesterov
2016-07-06  8:53             ` Oleg Nesterov
2016-07-06 11:43               ` Tetsuo Handa
2016-07-07 11:31   ` Michal Hocko
2016-07-03  2:39 ` [PATCH 4/8] mm,oom: Remove OOM_SCAN_ABORT case Tetsuo Handa
2016-07-03  2:40 ` [PATCH 5/8] mm,oom: Remove unused signal_struct->oom_victims Tetsuo Handa
2016-07-07 14:03   ` Michal Hocko
2016-07-03  2:40 ` [PATCH 6/8] mm,oom_reaper: Stop clearing TIF_MEMDIE on remote thread Tetsuo Handa
2016-07-07 14:06   ` Michal Hocko
2016-07-07 16:10     ` Tetsuo Handa
2016-07-07 16:54       ` Michal Hocko
2016-07-03  2:41 ` [PATCH 7/8] mm,oom_reaper: Pass OOM victim's comm and pid values via mm_struct Tetsuo Handa
2016-07-03  2:41 ` [PATCH 8/8] mm,oom_reaper: Make OOM reaper use list of mm_struct Tetsuo Handa
2016-07-04 10:40   ` Oleg Nesterov
2016-07-07 14:15   ` Michal Hocko [this message]
2016-07-07 11:04 ` [PATCH 0/8] Change OOM killer to " Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160707141548.GO5379@dhcp22.suse.cz \
    --to=mhocko@suse.cz \
    --cc=akpm@linux-foundation.org \
    --cc=linux-mm@kvack.org \
    --cc=mst@redhat.com \
    --cc=oleg@redhat.com \
    --cc=penguin-kernel@I-love.SAKURA.ne.jp \
    --cc=rientjes@google.com \
    --cc=vdavydov@parallels.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox