* [patch] mm: madvise avoid mmap_sem write
@ 2007-04-05 9:01 Nick Piggin
2007-04-05 11:53 ` Hugh Dickins
0 siblings, 1 reply; 2+ messages in thread
From: Nick Piggin @ 2007-04-05 9:01 UTC (permalink / raw)
To: Linux Memory Management List
Here is a newer version of the patch.
--
Avoid down_write of the mmap_sem in madvise when we can help it.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Index: linux-2.6/mm/madvise.c
===================================================================
--- linux-2.6.orig/mm/madvise.c
+++ linux-2.6/mm/madvise.c
@@ -12,6 +12,24 @@
#include <linux/hugetlb.h>
/*
+ * Any behaviour which results in changes to the vma->vm_flags needs to
+ * take mmap_sem for writing. Others, which simply traverse vmas, need
+ * to only take it for reading.
+ */
+static int madvise_need_mmap_write(int behavior)
+{
+ switch (behavior) {
+ case MADV_REMOVE:
+ case MADV_WILLNEED:
+ case MADV_DONTNEED:
+ return 0;
+ default:
+ /* be safe, default to 1. list exceptions explicitly */
+ return 1;
+ }
+}
+
+/*
* We can potentially split a vm area into separate
* areas, each area with its own behavior.
*/
@@ -183,9 +201,9 @@ static long madvise_remove(struct vm_are
+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
/* vmtruncate_range needs to take i_mutex and i_alloc_sem */
- up_write(¤t->mm->mmap_sem);
+ up_read(¤t->mm->mmap_sem);
error = vmtruncate_range(mapping->host, offset, endoff);
- down_write(¤t->mm->mmap_sem);
+ down_read(¤t->mm->mmap_sem);
return error;
}
@@ -270,7 +288,10 @@ asmlinkage long sys_madvise(unsigned lon
int error = -EINVAL;
size_t len;
- down_write(¤t->mm->mmap_sem);
+ if (madvise_need_mmap_write(behavior))
+ down_write(¤t->mm->mmap_sem);
+ else
+ down_read(¤t->mm->mmap_sem);
if (start & ~PAGE_MASK)
goto out;
@@ -332,6 +353,10 @@ asmlinkage long sys_madvise(unsigned lon
vma = find_vma(current->mm, start);
}
out:
- up_write(¤t->mm->mmap_sem);
+ if (madvise_need_mmap_write(behavior))
+ up_write(¤t->mm->mmap_sem);
+ else
+ up_read(¤t->mm->mmap_sem);
+
return error;
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [patch] mm: madvise avoid mmap_sem write
2007-04-05 9:01 [patch] mm: madvise avoid mmap_sem write Nick Piggin
@ 2007-04-05 11:53 ` Hugh Dickins
0 siblings, 0 replies; 2+ messages in thread
From: Hugh Dickins @ 2007-04-05 11:53 UTC (permalink / raw)
To: Nick Piggin; +Cc: Linux Memory Management List
On Thu, 5 Apr 2007, Nick Piggin wrote:
> Here is a newer version of the patch.
That's very nice: yes, better defaulted the safe way round.
>
> --
>
> Avoid down_write of the mmap_sem in madvise when we can help it.
>
> Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: Hugh Dickins <hugh@veritas.com>
>
> Index: linux-2.6/mm/madvise.c
> ===================================================================
> --- linux-2.6.orig/mm/madvise.c
> +++ linux-2.6/mm/madvise.c
> @@ -12,6 +12,24 @@
> #include <linux/hugetlb.h>
>
> /*
> + * Any behaviour which results in changes to the vma->vm_flags needs to
> + * take mmap_sem for writing. Others, which simply traverse vmas, need
> + * to only take it for reading.
> + */
> +static int madvise_need_mmap_write(int behavior)
> +{
> + switch (behavior) {
> + case MADV_REMOVE:
> + case MADV_WILLNEED:
> + case MADV_DONTNEED:
> + return 0;
> + default:
> + /* be safe, default to 1. list exceptions explicitly */
> + return 1;
> + }
> +}
> +
> +/*
> * We can potentially split a vm area into separate
> * areas, each area with its own behavior.
> */
> @@ -183,9 +201,9 @@ static long madvise_remove(struct vm_are
> + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
>
> /* vmtruncate_range needs to take i_mutex and i_alloc_sem */
> - up_write(¤t->mm->mmap_sem);
> + up_read(¤t->mm->mmap_sem);
> error = vmtruncate_range(mapping->host, offset, endoff);
> - down_write(¤t->mm->mmap_sem);
> + down_read(¤t->mm->mmap_sem);
> return error;
> }
>
> @@ -270,7 +288,10 @@ asmlinkage long sys_madvise(unsigned lon
> int error = -EINVAL;
> size_t len;
>
> - down_write(¤t->mm->mmap_sem);
> + if (madvise_need_mmap_write(behavior))
> + down_write(¤t->mm->mmap_sem);
> + else
> + down_read(¤t->mm->mmap_sem);
>
> if (start & ~PAGE_MASK)
> goto out;
> @@ -332,6 +353,10 @@ asmlinkage long sys_madvise(unsigned lon
> vma = find_vma(current->mm, start);
> }
> out:
> - up_write(¤t->mm->mmap_sem);
> + if (madvise_need_mmap_write(behavior))
> + up_write(¤t->mm->mmap_sem);
> + else
> + up_read(¤t->mm->mmap_sem);
> +
> return error;
> }
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2007-04-05 11:53 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-04-05 9:01 [patch] mm: madvise avoid mmap_sem write Nick Piggin
2007-04-05 11:53 ` Hugh Dickins
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox