Re: [PATCH] mm, oom: simplify task's refcount handling

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Michal Hocko <mhocko@suse.com>
To: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: linux-mm@kvack.org, Andrew Morton <akpm@linux-foundation.org>,
	David Rientjes <rientjes@google.com>,
	Roman Gushchin <guro@fb.com>, Shakeel Butt <shakeelb@google.com>
Subject: Re: [PATCH] mm, oom: simplify task's refcount handling
Date: Wed, 24 Jul 2019 08:41:10 +0200	[thread overview]
Message-ID: <20190724064110.GC10882@dhcp22.suse.cz> (raw)
In-Reply-To: <1563940476-6162-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp>

On Wed 24-07-19 12:54:36, Tetsuo Handa wrote:
> Currently out_of_memory() is full of get_task_struct()/put_task_struct()
> calls. Since "mm, oom: avoid printk() iteration under RCU" introduced
> a list for holding a snapshot of all OOM victim candidates, let's share
> that list for select_bad_process() and oom_kill_process() in order to
> simplify task's refcount handling.
> 
> As a result of this patch, get_task_struct()/put_task_struct() calls
> in out_of_memory() are reduced to only 2 times respectively.

This is probably a matter of taste but the diffstat suggests to me that the
simplification is not all that great. On the other hand this makes the
oom handling even more tricky and harder for potential further
development - e.g. if we ever need to break the global lock down in the
future this would be another obstacle on the way. While potential
development might be too theoretical the benefit of the patch is not
really clear to me. The task_struct reference counting is not really
unusual operations and there is nothing really scary that we do with it
here. We already have to to extra mile wrt. task_lock so careful
reference count doesn't really jump out.

That being said, I do not think this patch gives any improvement.

> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Cc: Shakeel Butt <shakeelb@google.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Roman Gushchin <guro@fb.com>
> Cc: David Rientjes <rientjes@google.com>
> ---
>  include/linux/sched.h |   2 +-
>  mm/oom_kill.c         | 122 ++++++++++++++++++++++++--------------------------
>  2 files changed, 60 insertions(+), 64 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 48c1a4c..4062999 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1247,7 +1247,7 @@ struct task_struct {
>  #ifdef CONFIG_MMU
>  	struct task_struct		*oom_reaper_list;
>  #endif
> -	struct list_head		oom_victim_list;
> +	struct list_head		oom_candidate;
>  #ifdef CONFIG_VMAP_STACK
>  	struct vm_struct		*stack_vm_area;
>  #endif
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 110f948..311e0e9 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -63,6 +63,7 @@
>   * and mark_oom_victim
>   */
>  DEFINE_MUTEX(oom_lock);
> +static LIST_HEAD(oom_candidate_list);
>  
>  static inline bool is_memcg_oom(struct oom_control *oc)
>  {
> @@ -167,6 +168,41 @@ static bool oom_unkillable_task(struct task_struct *p)
>  	return false;
>  }
>  
> +static int add_candidate_task(struct task_struct *p, void *unused)
> +{
> +	if (!oom_unkillable_task(p)) {
> +		get_task_struct(p);
> +		list_add_tail(&p->oom_candidate, &oom_candidate_list);
> +	}
> +	return 0;
> +}
> +
> +static void link_oom_candidates(struct oom_control *oc)
> +{
> +	struct task_struct *p;
> +
> +	if (is_memcg_oom(oc))
> +		mem_cgroup_scan_tasks(oc->memcg, add_candidate_task, NULL);
> +	else {
> +		rcu_read_lock();
> +		for_each_process(p)
> +			add_candidate_task(p, NULL);
> +		rcu_read_unlock();
> +	}
> +
> +}
> +
> +static void unlink_oom_candidates(void)
> +{
> +	struct task_struct *p;
> +	struct task_struct *t;
> +
> +	list_for_each_entry_safe(p, t, &oom_candidate_list, oom_candidate) {
> +		list_del(&p->oom_candidate);
> +		put_task_struct(p);
> +	}
> +}
> +
>  /*
>   * Print out unreclaimble slabs info when unreclaimable slabs amount is greater
>   * than all user memory (LRU pages)
> @@ -344,16 +380,11 @@ static int oom_evaluate_task(struct task_struct *task, void *arg)
>  		goto next;
>  
>  select:
> -	if (oc->chosen)
> -		put_task_struct(oc->chosen);
> -	get_task_struct(task);
>  	oc->chosen = task;
>  	oc->chosen_points = points;
>  next:
>  	return 0;
>  abort:
> -	if (oc->chosen)
> -		put_task_struct(oc->chosen);
>  	oc->chosen = (void *)-1UL;
>  	return 1;
>  }
> @@ -364,27 +395,13 @@ static int oom_evaluate_task(struct task_struct *task, void *arg)
>   */
>  static void select_bad_process(struct oom_control *oc)
>  {
> -	if (is_memcg_oom(oc))
> -		mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
> -	else {
> -		struct task_struct *p;
> -
> -		rcu_read_lock();
> -		for_each_process(p)
> -			if (oom_evaluate_task(p, oc))
> -				break;
> -		rcu_read_unlock();
> -	}
> -}
> -
> +	struct task_struct *p;
>  
> -static int add_candidate_task(struct task_struct *p, void *arg)
> -{
> -	if (!oom_unkillable_task(p)) {
> -		get_task_struct(p);
> -		list_add_tail(&p->oom_victim_list, (struct list_head *) arg);
> +	list_for_each_entry(p, &oom_candidate_list, oom_candidate) {
> +		cond_resched();
> +		if (oom_evaluate_task(p, oc))
> +			break;
>  	}
> -	return 0;
>  }
>  
>  /**
> @@ -399,21 +416,12 @@ static int add_candidate_task(struct task_struct *p, void *arg)
>   */
>  static void dump_tasks(struct oom_control *oc)
>  {
> -	static LIST_HEAD(list);
>  	struct task_struct *p;
>  	struct task_struct *t;
>  
> -	if (is_memcg_oom(oc))
> -		mem_cgroup_scan_tasks(oc->memcg, add_candidate_task, &list);
> -	else {
> -		rcu_read_lock();
> -		for_each_process(p)
> -			add_candidate_task(p, &list);
> -		rcu_read_unlock();
> -	}
>  	pr_info("Tasks state (memory values in pages):\n");
>  	pr_info("[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
> -	list_for_each_entry(p, &list, oom_victim_list) {
> +	list_for_each_entry(p, &oom_candidate_list, oom_candidate) {
>  		cond_resched();
>  		/* p may not have freeable memory in nodemask */
>  		if (!is_memcg_oom(oc) && !oom_cpuset_eligible(p, oc))
> @@ -430,10 +438,6 @@ static void dump_tasks(struct oom_control *oc)
>  			t->signal->oom_score_adj, t->comm);
>  		task_unlock(t);
>  	}
> -	list_for_each_entry_safe(p, t, &list, oom_victim_list) {
> -		list_del(&p->oom_victim_list);
> -		put_task_struct(p);
> -	}
>  }
>  
>  static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
> @@ -859,17 +863,11 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
>  	bool can_oom_reap = true;
>  
>  	p = find_lock_task_mm(victim);
> -	if (!p) {
> -		put_task_struct(victim);
> +	if (!p)
>  		return;
> -	} else if (victim != p) {
> -		get_task_struct(p);
> -		put_task_struct(victim);
> -		victim = p;
> -	}
>  
> -	/* Get a reference to safely compare mm after task_unlock(victim) */
> -	mm = victim->mm;
> +	/* Get a reference to safely compare mm after task_unlock(p) */
> +	mm = p->mm;
>  	mmgrab(mm);
>  
>  	/* Raise event before sending signal: task reaper must see this */
> @@ -881,16 +879,15 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
>  	 * in order to prevent the OOM victim from depleting the memory
>  	 * reserves from the user space under its control.
>  	 */
> -	do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID);
> -	mark_oom_victim(victim);
> +	do_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_TGID);
> +	mark_oom_victim(p);
>  	pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB, UID:%u\n",
> -		message, task_pid_nr(victim), victim->comm,
> -		K(victim->mm->total_vm),
> -		K(get_mm_counter(victim->mm, MM_ANONPAGES)),
> -		K(get_mm_counter(victim->mm, MM_FILEPAGES)),
> -		K(get_mm_counter(victim->mm, MM_SHMEMPAGES)),
> -		from_kuid(&init_user_ns, task_uid(victim)));
> -	task_unlock(victim);
> +	       message, task_pid_nr(p), p->comm, K(mm->total_vm),
> +	       K(get_mm_counter(mm, MM_ANONPAGES)),
> +	       K(get_mm_counter(mm, MM_FILEPAGES)),
> +	       K(get_mm_counter(mm, MM_SHMEMPAGES)),
> +	       from_kuid(&init_user_ns, task_uid(p)));
> +	task_unlock(p);
>  
>  	/*
>  	 * Kill all user processes sharing victim->mm in other thread groups, if
> @@ -929,7 +926,6 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
>  		wake_oom_reaper(victim);
>  
>  	mmdrop(mm);
> -	put_task_struct(victim);
>  }
>  #undef K
>  
> @@ -940,10 +936,8 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
>  static int oom_kill_memcg_member(struct task_struct *task, void *message)
>  {
>  	if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN &&
> -	    !is_global_init(task)) {
> -		get_task_struct(task);
> +	    !is_global_init(task))
>  		__oom_kill_process(task, message);
> -	}
>  	return 0;
>  }
>  
> @@ -964,7 +958,6 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
>  		mark_oom_victim(victim);
>  		wake_oom_reaper(victim);
>  		task_unlock(victim);
> -		put_task_struct(victim);
>  		return;
>  	}
>  	task_unlock(victim);
> @@ -1073,6 +1066,8 @@ bool out_of_memory(struct oom_control *oc)
>  	if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS))
>  		return true;
>  
> +	link_oom_candidates(oc);
> +
>  	/*
>  	 * Check if there were limitations on the allocation (only relevant for
>  	 * NUMA and memcg) that may require different handling.
> @@ -1086,10 +1081,9 @@ bool out_of_memory(struct oom_control *oc)
>  	    current->mm && !oom_unkillable_task(current) &&
>  	    oom_cpuset_eligible(current, oc) &&
>  	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
> -		get_task_struct(current);
>  		oc->chosen = current;
>  		oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
> -		return true;
> +		goto done;
>  	}
>  
>  	select_bad_process(oc);
> @@ -1108,6 +1102,8 @@ bool out_of_memory(struct oom_control *oc)
>  	if (oc->chosen && oc->chosen != (void *)-1UL)
>  		oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
>  				 "Memory cgroup out of memory");
> + done:
> +	unlink_oom_candidates();
>  	return !!oc->chosen;
>  }
>  
> -- 
> 1.8.3.1

-- 
Michal Hocko
SUSE Labs

next prev parent reply	other threads:[~2019-07-24  6:41 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-24  3:54 Tetsuo Handa
2019-07-24  6:41 ` Michal Hocko [this message]
2019-07-24  7:37   ` Tetsuo Handa
2019-07-24  8:07     ` Michal Hocko
2019-07-26  3:17       ` Tetsuo Handa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190724064110.GC10882@dhcp22.suse.cz \
    --to=mhocko@suse.com \
    --cc=akpm@linux-foundation.org \
    --cc=guro@fb.com \
    --cc=linux-mm@kvack.org \
    --cc=penguin-kernel@I-love.SAKURA.ne.jp \
    --cc=rientjes@google.com \
    --cc=shakeelb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox