linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Serge E. Hallyn" <serge@hallyn.com>
To: Matt Helsley <matthltc@us.ibm.com>
Cc: linux-mm@kvack.org, LKML <linux-kernel@vger.kernel.org>
Subject: Re: [RFC][PATCH] Replacing the /proc/<pid|self>/exe symlink code
Date: Wed, 30 May 2007 13:09:23 -0500	[thread overview]
Message-ID: <20070530180923.GA22345@vino.hallyn.com> (raw)
In-Reply-To: <1180486369.11715.69.camel@localhost.localdomain>

Quoting Matt Helsley (matthltc@us.ibm.com):
> This patch avoids holding the mmap semaphore while walking VMAs in response to
> programs which read or follow the /proc/<pid|self>/exe symlink. This also allows us
> to merge mmu and nommu proc_exe_link() functions. The costs are holding a separate
> reference to the executable file stored in the task struct and increased code in
> fork, exec, and exit paths.
> 
> Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
> ---
> 
> Compiled and passed simple tests for regressions when patched against a 2.6.20
> and 2.6.22-rc2-mm1 kernel.
> 
>  fs/exec.c             |    5 +++--
>  fs/proc/base.c        |   20 ++++++++++++++++++++
>  fs/proc/internal.h    |    1 -
>  fs/proc/task_mmu.c    |   34 ----------------------------------
>  fs/proc/task_nommu.c  |   34 ----------------------------------
>  include/linux/sched.h |    1 +
>  kernel/exit.c         |    2 ++
>  kernel/fork.c         |   10 +++++++++-
>  8 files changed, 35 insertions(+), 72 deletions(-)
> 
> Index: linux-2.6.22-rc2-mm1/include/linux/sched.h
> ===================================================================
> --- linux-2.6.22-rc2-mm1.orig/include/linux/sched.h
> +++ linux-2.6.22-rc2-mm1/include/linux/sched.h
> @@ -988,10 +988,11 @@ struct task_struct {
>  	int oomkilladj; /* OOM kill score adjustment (bit shift). */
>  	char comm[TASK_COMM_LEN]; /* executable name excluding path
>  				     - access with [gs]et_task_comm (which lock
>  				       it with task_lock())
>  				     - initialized normally by flush_old_exec */
> +	struct file *exe_file;
>  /* file system info */
>  	int link_count, total_link_count;
>  #ifdef CONFIG_SYSVIPC
>  /* ipc stuff */
>  	struct sysv_sem sysvsem;
> Index: linux-2.6.22-rc2-mm1/fs/exec.c
> ===================================================================
> --- linux-2.6.22-rc2-mm1.orig/fs/exec.c
> +++ linux-2.6.22-rc2-mm1/fs/exec.c
> @@ -1106,12 +1106,13 @@ int search_binary_handler(struct linux_b
>  			read_unlock(&binfmt_lock);
>  			retval = fn(bprm, regs);
>  			if (retval >= 0) {
>  				put_binfmt(fmt);
>  				allow_write_access(bprm->file);
> -				if (bprm->file)
> -					fput(bprm->file);
> +				if (current->exe_file)
> +					fput(current->exe_file);
> +				current->exe_file = bprm->file;
>  				bprm->file = NULL;
>  				current->did_exec = 1;
>  				proc_exec_connector(current);
>  				return retval;
>  			}
> Index: linux-2.6.22-rc2-mm1/fs/proc/base.c
> ===================================================================
> --- linux-2.6.22-rc2-mm1.orig/fs/proc/base.c
> +++ linux-2.6.22-rc2-mm1/fs/proc/base.c
> @@ -951,10 +951,30 @@ const struct file_operations proc_pid_sc
>  	.write		= sched_write,
>  	.llseek		= seq_lseek,
>  	.release	= seq_release,
>  };
>  
> +static int proc_exe_link(struct inode *inode, struct dentry **dentry,
> +    			 struct vfsmount **mnt)
> +{
> +	int error;
> +	struct task_struct *task;
> +
> +	task = get_proc_task(inode);
> +	if (!task)
> +		return -ENOENT;
> +	error = -ENOSYS;
> +	if (!task->exe_file)
> +		goto out;
> +	*mnt = mntget(task->exe_file->f_path.mnt);
> +	*dentry = dget(task->exe_file->f_path.dentry);
> +	error = 0;
> +out:
> +	put_task_struct(task);
> +	return error;
> +}
> +
>  static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
>  {
>  	struct inode *inode = dentry->d_inode;
>  	int error = -EACCES;
>  
> Index: linux-2.6.22-rc2-mm1/kernel/exit.c
> ===================================================================
> --- linux-2.6.22-rc2-mm1.orig/kernel/exit.c
> +++ linux-2.6.22-rc2-mm1/kernel/exit.c
> @@ -924,10 +924,12 @@ fastcall void do_exit(long code)
>  	if (unlikely(tsk->audit_context))
>  		audit_free(tsk);
>  
>  	taskstats_exit(tsk, group_dead);
>  
> +	if (tsk->exe_file)
> +		fput(tsk->exe_file);

Hi,

just taking a cursory look so I may be missing something, but doesn't
this leave the possibility that right here, with tsk->exe_file being
put, another task would try to look at tsk's /proc/tsk->pid/exe?

thanks,
-serge


>  	exit_mm(tsk);
>  
>  	if (group_dead)
>  		acct_process();
>  	exit_sem(tsk);
> Index: linux-2.6.22-rc2-mm1/kernel/fork.c
> ===================================================================
> --- linux-2.6.22-rc2-mm1.orig/kernel/fork.c
> +++ linux-2.6.22-rc2-mm1/kernel/fork.c
> @@ -1163,10 +1163,13 @@ static struct task_struct *copy_process(
>  
>  	/* ok, now we should be set up.. */
>  	p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
>  	p->pdeath_signal = 0;
>  	p->exit_state = 0;
> +	p->exe_file = current->exe_file;
> +	if (p->exe_file)
> +		get_file(p->exe_file);
>  
>  	/*
>  	 * Ok, make it visible to the rest of the system.
>  	 * We dont wake it up yet.
>  	 */
> @@ -1218,11 +1221,11 @@ static struct task_struct *copy_process(
>   	recalc_sigpending();
>  	if (signal_pending(current)) {
>  		spin_unlock(&current->sighand->siglock);
>  		write_unlock_irq(&tasklist_lock);
>  		retval = -ERESTARTNOINTR;
> -		goto bad_fork_cleanup_namespaces;
> +		goto bad_fork_cleanup_exe_file;
>  	}
>  
>  	if (clone_flags & CLONE_THREAD) {
>  		p->group_leader = current->group_leader;
>  		list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
> @@ -1274,10 +1277,15 @@ static struct task_struct *copy_process(
>  		put_user(p->pid, parent_tidptr);
>  
>  	proc_fork_connector(p);
>  	return p;
>  
> +bad_fork_cleanup_exe_file:
> +	if (p->exe_file) {
> +		fput(p->exe_file);
> +		p->exe_file = NULL;
> +	}
>  bad_fork_cleanup_namespaces:
>  	exit_task_namespaces(p);
>  bad_fork_cleanup_keys:
>  	exit_keys(p);
>  bad_fork_cleanup_mm:
> Index: linux-2.6.22-rc2-mm1/fs/proc/task_mmu.c
> ===================================================================
> --- linux-2.6.22-rc2-mm1.orig/fs/proc/task_mmu.c
> +++ linux-2.6.22-rc2-mm1/fs/proc/task_mmu.c
> @@ -71,44 +71,10 @@ int task_statm(struct mm_struct *mm, int
>  	*data = mm->total_vm - mm->shared_vm;
>  	*resident = *shared + get_mm_counter(mm, anon_rss);
>  	return mm->total_vm;
>  }
>  
> -int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
> -{
> -	struct vm_area_struct * vma;
> -	int result = -ENOENT;
> -	struct task_struct *task = get_proc_task(inode);
> -	struct mm_struct * mm = NULL;
> -
> -	if (task) {
> -		mm = get_task_mm(task);
> -		put_task_struct(task);
> -	}
> -	if (!mm)
> -		goto out;
> -	down_read(&mm->mmap_sem);
> -
> -	vma = mm->mmap;
> -	while (vma) {
> -		if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
> -			break;
> -		vma = vma->vm_next;
> -	}
> -
> -	if (vma) {
> -		*mnt = mntget(vma->vm_file->f_path.mnt);
> -		*dentry = dget(vma->vm_file->f_path.dentry);
> -		result = 0;
> -	}
> -
> -	up_read(&mm->mmap_sem);
> -	mmput(mm);
> -out:
> -	return result;
> -}
> -
>  static void pad_len_spaces(struct seq_file *m, int len)
>  {
>  	len = 25 + sizeof(void*) * 6 - len;
>  	if (len < 1)
>  		len = 1;
> Index: linux-2.6.22-rc2-mm1/fs/proc/task_nommu.c
> ===================================================================
> --- linux-2.6.22-rc2-mm1.orig/fs/proc/task_nommu.c
> +++ linux-2.6.22-rc2-mm1/fs/proc/task_nommu.c
> @@ -102,44 +102,10 @@ int task_statm(struct mm_struct *mm, int
>  	up_read(&mm->mmap_sem);
>  	*resident = size;
>  	return size;
>  }
>  
> -int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
> -{
> -	struct vm_list_struct *vml;
> -	struct vm_area_struct *vma;
> -	struct task_struct *task = get_proc_task(inode);
> -	struct mm_struct *mm = get_task_mm(task);
> -	int result = -ENOENT;
> -
> -	if (!mm)
> -		goto out;
> -	down_read(&mm->mmap_sem);
> -
> -	vml = mm->context.vmlist;
> -	vma = NULL;
> -	while (vml) {
> -		if ((vml->vma->vm_flags & VM_EXECUTABLE) && vml->vma->vm_file) {
> -			vma = vml->vma;
> -			break;
> -		}
> -		vml = vml->next;
> -	}
> -
> -	if (vma) {
> -		*mnt = mntget(vma->vm_file->f_path.mnt);
> -		*dentry = dget(vma->vm_file->f_path.dentry);
> -		result = 0;
> -	}
> -
> -	up_read(&mm->mmap_sem);
> -	mmput(mm);
> -out:
> -	return result;
> -}
> -
>  /*
>   * display mapping lines for a particular process's /proc/pid/maps
>   */
>  static int show_map(struct seq_file *m, void *_vml)
>  {
> Index: linux-2.6.22-rc2-mm1/fs/proc/internal.h
> ===================================================================
> --- linux-2.6.22-rc2-mm1.orig/fs/proc/internal.h
> +++ linux-2.6.22-rc2-mm1/fs/proc/internal.h
> @@ -38,11 +38,10 @@ extern int nommu_vma_show(struct seq_fil
>  #endif
>  
>  extern int maps_protect;
>  
>  extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f);
> -extern int proc_exe_link(struct inode *, struct dentry **, struct vfsmount **);
>  extern int proc_tid_stat(struct task_struct *,  char *);
>  extern int proc_tgid_stat(struct task_struct *, char *);
>  extern int proc_pid_status(struct task_struct *, char *);
>  extern int proc_pid_statm(struct task_struct *, char *);
>  extern loff_t mem_lseek(struct file * file, loff_t offset, int orig);
> 
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2007-05-30 18:09 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-05-30  0:52 Matt Helsley
2007-05-30 18:09 ` Serge E. Hallyn [this message]
2007-05-31  6:01   ` Chris Wright
2007-06-01  1:13     ` Matt Helsley
2007-05-31 17:56   ` Matt Helsley
2007-06-01 22:31     ` Serge E. Hallyn
2007-06-02  0:42       ` Matt Helsley
2007-06-02  0:46 Matt Helsley
2007-06-06 22:21 ` Serge E. Hallyn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070530180923.GA22345@vino.hallyn.com \
    --to=serge@hallyn.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=matthltc@us.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox