linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: David Rientjes <rientjes@google.com>
To: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Rik van Riel <riel@redhat.com>, Paul Menage <menage@google.com>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [patch -mm v2] mm: introduce oom_adj_child
Date: Fri, 31 Jul 2009 12:38:24 -0700 (PDT)	[thread overview]
Message-ID: <alpine.DEB.2.00.0907311225480.22732@chino.kir.corp.google.com> (raw)
In-Reply-To: <20090731154823.B6EF.A69D9226@jp.fujitsu.com>

On Fri, 31 Jul 2009, KOSAKI Motohiro wrote:

> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index 3ce5ae9..c64499e 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -1008,7 +1008,7 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
>  		return -ESRCH;
>  	task_lock(task);
>  	if (task->mm)
> -		oom_adjust = task->mm->oom_adj;
> +		oom_adjust = task->signal->oom_adj;
>  	else
>  		oom_adjust = OOM_DISABLE;
>  	task_unlock(task);

This may display a /proc/pid/oom_adj that is radically different from 
task->mm->oom_adj_cached without knowledge to userspace and you can't 
simply display task->mm>oom_adj_cached here because it gets reset on every 
write to /proc/pid/oom_adj.

> @@ -1046,12 +1046,13 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
>  		put_task_struct(task);
>  		return -EINVAL;
>  	}
> -	if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) {
> +	if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
>  		task_unlock(task);
>  		put_task_struct(task);
>  		return -EACCES;
>  	}
> -	task->mm->oom_adj = oom_adjust;
> +	task->signal->oom_adj = oom_adjust;
> +	task->mm->oom_adj_cached = OOM_CACHE_DEFAULT;
>  	task_unlock(task);
>  	put_task_struct(task);
>  	if (end - buffer == 0)
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 7acc843..f93f97f 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -240,7 +240,8 @@ struct mm_struct {
>  
>  	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
>  
> -	s8 oom_adj;	/* OOM kill score adjustment (bit shift) */
> +	s8 oom_adj_cached;	/* mirror from signal_struct->oom_adj.
> +				   in vfork case, multiple processes use the same mm. */
>  
>  	cpumask_t cpu_vm_mask;
>  
> diff --git a/include/linux/oom.h b/include/linux/oom.h
> index a7979ba..a219480 100644
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -3,6 +3,7 @@
>  
>  /* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */
>  #define OOM_DISABLE (-17)
> +#define OOM_CACHE_DEFAULT (15)
>  /* inclusive */
>  #define OOM_ADJUST_MIN (-16)
>  #define OOM_ADJUST_MAX 15
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 3ab08e4..e10b12b 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -629,6 +629,8 @@ struct signal_struct {
>  	unsigned audit_tty;
>  	struct tty_audit_buf *tty_audit_buf;
>  #endif
> +
> +	s8 oom_adj;	/* OOM kill score adjustment (bit shift) */
>  };
>  
>  /* Context switch must be unlocked if interrupts are to be enabled */

I don't believe oom_adj is an appropriate use of signal_struct, sorry.

> diff --git a/kernel/exit.c b/kernel/exit.c
> index 869dc22..c741a45 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -688,6 +689,7 @@ static void exit_mm(struct task_struct * tsk)
>  	enter_lazy_tlb(mm, current);
>  	/* We don't want this task to be frozen prematurely */
>  	clear_freeze_flag(tsk);
> +	mm->oom_adj_cached = OOM_CACHE_DEFAULT;
>  	task_unlock(tsk);
>  	mm_update_next_owner(mm);
>  	mmput(mm);

This is similiar to an early proposal that wanted to keep an array of 
oom_adj values for tasks attached to the mm in mm_struct.  The problem is 
that you're obviously losing information about all threads attached to the 
mm any time one of the threads exits or writes to /proc/pid/oom_adj.  That 
information can only be regenerated with a tasklist scan.

> diff --git a/kernel/fork.c b/kernel/fork.c
> index 9b42695..b7cb474 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -426,6 +427,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
>  	init_rwsem(&mm->mmap_sem);
>  	INIT_LIST_HEAD(&mm->mmlist);
>  	mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
> +	mm->oom_adj_cached = OOM_CACHE_DEFAULT;
>  	mm->core_state = NULL;
>  	mm->nr_ptes = 0;
>  	set_mm_counter(mm, file_rss, 0);
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 175a67a..eae2d78 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -58,7 +58,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
>  	unsigned long points, cpu_time, run_time;
>  	struct mm_struct *mm;
>  	struct task_struct *child;
> -	int oom_adj;
> +	s8 oom_adj;
>  
>  	task_lock(p);
>  	mm = p->mm;
> @@ -66,7 +66,10 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
>  		task_unlock(p);
>  		return 0;
>  	}
> -	oom_adj = mm->oom_adj;
> +
> +	if (mm->oom_adj_cached < p->signal->oom_adj)
> +		mm->oom_adj_cached = p->signal->oom_adj;

This conditional will never be true since mm->oom_adj_cached is 
initialized to 15, which is the upper bound on which p->signal->oom_adj 
may ever be, so mm->oom_adj_cached never gets changed from 
OOM_CACHE_DEFAULT.

Thus, this patch doesn't even work, and you probably would have noticed 
that if you'd checked /proc/pid/oom_score for any pid.

Even if mm->oom_adj_cached _was_ properly updated here, 
/proc/pid/oom_score would be out of sync with more negative oom_adj values 
for threads sharing the mm_struct since it calls badness() for only a 
single thread.

> +	oom_adj = mm->oom_adj_cached;
>  	if (oom_adj == OOM_DISABLE) {
>  		task_unlock(p);
>  		return 0;
> @@ -350,7 +354,7 @@ static int oom_kill_task(struct task_struct *p)
>  
>  	task_lock(p);
>  	mm = p->mm;
> -	if (!mm || mm->oom_adj == OOM_DISABLE) {
> +	if (!mm || p->signal->oom_adj == OOM_DISABLE) {
>  		task_unlock(p);
>  		return 1;
>  	}

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2009-07-31 19:38 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-07-29  4:27 David Rientjes
2009-07-29 23:13 ` Andrew Morton
2009-07-29 23:25   ` Paul Menage
2009-07-30  2:32 ` KOSAKI Motohiro
2009-07-30  7:06   ` David Rientjes
2009-07-31  6:47     ` KOSAKI Motohiro
2009-07-31  9:31       ` David Rientjes
2009-08-03 11:58         ` KOSAKI Motohiro
2009-08-03 12:12           ` KOSAKI Motohiro
2009-07-30  9:00 ` KAMEZAWA Hiroyuki
2009-07-30  9:31   ` David Rientjes
2009-07-30 10:02     ` KAMEZAWA Hiroyuki
2009-07-30 19:05       ` David Rientjes
2009-07-31  0:33         ` KAMEZAWA Hiroyuki
2009-07-31  6:50           ` KOSAKI Motohiro
2009-07-31 19:38             ` David Rientjes [this message]
2009-08-03 12:16               ` KOSAKI Motohiro
2009-07-31  9:36           ` David Rientjes
2009-07-31 10:49             ` KAMEZAWA Hiroyuki
2009-07-31 19:18               ` David Rientjes
2009-08-01  1:10                 ` KAMEZAWA Hiroyuki
2009-08-01 20:26                   ` David Rientjes
2009-08-03  1:42                     ` KAMEZAWA Hiroyuki
2009-08-03  7:59                       ` David Rientjes
2009-08-03  8:02                         ` KAMEZAWA Hiroyuki
2009-08-03  8:08                           ` David Rientjes
2009-08-03  8:45                             ` KAMEZAWA Hiroyuki
2009-08-03  8:55                               ` KAMEZAWA Hiroyuki
2009-08-03 12:19                                 ` KOSAKI Motohiro
2009-08-03 12:32                         ` KOSAKI Motohiro
2009-08-03 12:21                     ` KOSAKI Motohiro
2009-08-03 16:17                     ` Paul Menage

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=alpine.DEB.2.00.0907311225480.22732@chino.kir.corp.google.com \
    --to=rientjes@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=menage@google.com \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox