linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [patch] mm: madvise avoid mmap_sem write
@ 2007-04-05  9:01 Nick Piggin
  2007-04-05 11:53 ` Hugh Dickins
  0 siblings, 1 reply; 2+ messages in thread
From: Nick Piggin @ 2007-04-05  9:01 UTC (permalink / raw)
  To: Linux Memory Management List

Here is a newer version of the patch.

--

Avoid down_write of the mmap_sem in madvise when we can help it.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/mm/madvise.c
===================================================================
--- linux-2.6.orig/mm/madvise.c
+++ linux-2.6/mm/madvise.c
@@ -12,6 +12,24 @@
 #include <linux/hugetlb.h>
 
 /*
+ * Any behaviour which results in changes to the vma->vm_flags needs to
+ * take mmap_sem for writing. Others, which simply traverse vmas, need
+ * to only take it for reading.
+ */
+static int madvise_need_mmap_write(int behavior)
+{
+	switch (behavior) {
+	case MADV_REMOVE:
+	case MADV_WILLNEED:
+	case MADV_DONTNEED:
+		return 0;
+	default:
+		/* be safe, default to 1. list exceptions explicitly */
+		return 1;
+	}
+}
+
+/*
  * We can potentially split a vm area into separate
  * areas, each area with its own behavior.
  */
@@ -183,9 +201,9 @@ static long madvise_remove(struct vm_are
 			+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
 
 	/* vmtruncate_range needs to take i_mutex and i_alloc_sem */
-	up_write(&current->mm->mmap_sem);
+	up_read(&current->mm->mmap_sem);
 	error = vmtruncate_range(mapping->host, offset, endoff);
-	down_write(&current->mm->mmap_sem);
+	down_read(&current->mm->mmap_sem);
 	return error;
 }
 
@@ -270,7 +288,10 @@ asmlinkage long sys_madvise(unsigned lon
 	int error = -EINVAL;
 	size_t len;
 
-	down_write(&current->mm->mmap_sem);
+	if (madvise_need_mmap_write(behavior))
+		down_write(&current->mm->mmap_sem);
+	else
+		down_read(&current->mm->mmap_sem);
 
 	if (start & ~PAGE_MASK)
 		goto out;
@@ -332,6 +353,10 @@ asmlinkage long sys_madvise(unsigned lon
 			vma = find_vma(current->mm, start);
 	}
 out:
-	up_write(&current->mm->mmap_sem);
+	if (madvise_need_mmap_write(behavior))
+		up_write(&current->mm->mmap_sem);
+	else
+		up_read(&current->mm->mmap_sem);
+
 	return error;
 }

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [patch] mm: madvise avoid mmap_sem write
  2007-04-05  9:01 [patch] mm: madvise avoid mmap_sem write Nick Piggin
@ 2007-04-05 11:53 ` Hugh Dickins
  0 siblings, 0 replies; 2+ messages in thread
From: Hugh Dickins @ 2007-04-05 11:53 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Linux Memory Management List

On Thu, 5 Apr 2007, Nick Piggin wrote:
> Here is a newer version of the patch.

That's very nice: yes, better defaulted the safe way round.

> 
> --
> 
> Avoid down_write of the mmap_sem in madvise when we can help it.
> 
> Signed-off-by: Nick Piggin <npiggin@suse.de>

Acked-by: Hugh Dickins <hugh@veritas.com>

> 
> Index: linux-2.6/mm/madvise.c
> ===================================================================
> --- linux-2.6.orig/mm/madvise.c
> +++ linux-2.6/mm/madvise.c
> @@ -12,6 +12,24 @@
>  #include <linux/hugetlb.h>
>  
>  /*
> + * Any behaviour which results in changes to the vma->vm_flags needs to
> + * take mmap_sem for writing. Others, which simply traverse vmas, need
> + * to only take it for reading.
> + */
> +static int madvise_need_mmap_write(int behavior)
> +{
> +	switch (behavior) {
> +	case MADV_REMOVE:
> +	case MADV_WILLNEED:
> +	case MADV_DONTNEED:
> +		return 0;
> +	default:
> +		/* be safe, default to 1. list exceptions explicitly */
> +		return 1;
> +	}
> +}
> +
> +/*
>   * We can potentially split a vm area into separate
>   * areas, each area with its own behavior.
>   */
> @@ -183,9 +201,9 @@ static long madvise_remove(struct vm_are
>  			+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
>  
>  	/* vmtruncate_range needs to take i_mutex and i_alloc_sem */
> -	up_write(&current->mm->mmap_sem);
> +	up_read(&current->mm->mmap_sem);
>  	error = vmtruncate_range(mapping->host, offset, endoff);
> -	down_write(&current->mm->mmap_sem);
> +	down_read(&current->mm->mmap_sem);
>  	return error;
>  }
>  
> @@ -270,7 +288,10 @@ asmlinkage long sys_madvise(unsigned lon
>  	int error = -EINVAL;
>  	size_t len;
>  
> -	down_write(&current->mm->mmap_sem);
> +	if (madvise_need_mmap_write(behavior))
> +		down_write(&current->mm->mmap_sem);
> +	else
> +		down_read(&current->mm->mmap_sem);
>  
>  	if (start & ~PAGE_MASK)
>  		goto out;
> @@ -332,6 +353,10 @@ asmlinkage long sys_madvise(unsigned lon
>  			vma = find_vma(current->mm, start);
>  	}
>  out:
> -	up_write(&current->mm->mmap_sem);
> +	if (madvise_need_mmap_write(behavior))
> +		up_write(&current->mm->mmap_sem);
> +	else
> +		up_read(&current->mm->mmap_sem);
> +
>  	return error;
>  }

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2007-04-05 11:53 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-04-05  9:01 [patch] mm: madvise avoid mmap_sem write Nick Piggin
2007-04-05 11:53 ` Hugh Dickins

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox