* [PATCH] Remove page_table_lock from vma manipulations
@ 2003-06-04 22:30 Dave McCracken
2003-06-04 22:37 ` William Lee Irwin III
2003-06-04 22:38 ` Andrew Morton
0 siblings, 2 replies; 5+ messages in thread
From: Dave McCracken @ 2003-06-04 22:30 UTC (permalink / raw)
To: Linux Memory Management, Linux Kernel
[-- Attachment #1: Type: text/plain, Size: 550 bytes --]
After more careful consideration, I don't see any reasons why
page_table_lock is necessary for dealing with vmas. I found one spot in
swapoff, but it was easily changed to mmap_sem. I've beat on this code and
mjb has beat on this code with no problems. Here's the patch to remove it.
Feel free to poke holes in it.
Dave McCracken
======================================================================
Dave McCracken IBM Linux Base Kernel Team 1-512-838-3059
dmccr@us.ibm.com T/L 678-3059
[-- Attachment #2: nolock-2.5.70-mm4-1.diff --]
[-- Type: text/plain, Size: 4710 bytes --]
--- 2.5.70-mm4/./mm/swapfile.c 2003-05-26 20:00:25.000000000 -0500
+++ 2.5.70-mm4-nolock/./mm/swapfile.c 2003-06-04 16:02:49.000000000 -0500
@@ -493,6 +493,7 @@ static int unuse_process(struct mm_struc
/*
* Go through process' page directory.
*/
+ down_read(&mm->mmap_sem);
spin_lock(&mm->page_table_lock);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
pgd_t * pgd = pgd_offset(mm, vma->vm_start);
@@ -500,6 +501,7 @@ static int unuse_process(struct mm_struc
break;
}
spin_unlock(&mm->page_table_lock);
+ up_read(&mm->mmap_sem);
pte_chain_free(pte_chain);
return 0;
}
--- 2.5.70-mm4/./mm/mmap.c 2003-06-04 15:47:24.000000000 -0500
+++ 2.5.70-mm4-nolock/./mm/mmap.c 2003-06-04 16:02:49.000000000 -0500
@@ -346,9 +346,7 @@ static void vma_link(struct mm_struct *m
if (mapping)
down(&mapping->i_shared_sem);
- spin_lock(&mm->page_table_lock);
__vma_link(mm, vma, prev, rb_link, rb_parent);
- spin_unlock(&mm->page_table_lock);
if (mapping)
up(&mapping->i_shared_sem);
@@ -429,7 +427,6 @@ static int vma_merge(struct mm_struct *m
unsigned long end, unsigned long vm_flags,
struct file *file, unsigned long pgoff)
{
- spinlock_t * lock = &mm->page_table_lock;
/*
* We later require that vma->vm_flags == vm_flags, so this tests
@@ -458,7 +455,6 @@ static int vma_merge(struct mm_struct *m
down(&inode->i_mapping->i_shared_sem);
need_up = 1;
}
- spin_lock(lock);
prev->vm_end = end;
/*
@@ -471,7 +467,6 @@ static int vma_merge(struct mm_struct *m
prev->vm_end = next->vm_end;
__vma_unlink(mm, next, prev);
__remove_shared_vm_struct(next, inode);
- spin_unlock(lock);
if (need_up)
up(&inode->i_mapping->i_shared_sem);
if (file)
@@ -481,7 +476,6 @@ static int vma_merge(struct mm_struct *m
kmem_cache_free(vm_area_cachep, next);
return 1;
}
- spin_unlock(lock);
if (need_up)
up(&inode->i_mapping->i_shared_sem);
return 1;
@@ -497,10 +491,8 @@ static int vma_merge(struct mm_struct *m
pgoff, (end - addr) >> PAGE_SHIFT))
return 0;
if (end == prev->vm_start) {
- spin_lock(lock);
prev->vm_start = addr;
prev->vm_pgoff -= (end - addr) >> PAGE_SHIFT;
- spin_unlock(lock);
return 1;
}
}
@@ -945,19 +937,16 @@ int expand_stack(struct vm_area_struct *
*/
address += 4 + PAGE_SIZE - 1;
address &= PAGE_MASK;
- spin_lock(&vma->vm_mm->page_table_lock);
grow = (address - vma->vm_end) >> PAGE_SHIFT;
/* Overcommit.. */
if (!vm_enough_memory(grow)) {
- spin_unlock(&vma->vm_mm->page_table_lock);
return -ENOMEM;
}
if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur ||
((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
current->rlim[RLIMIT_AS].rlim_cur) {
- spin_unlock(&vma->vm_mm->page_table_lock);
vm_unacct_memory(grow);
return -ENOMEM;
}
@@ -965,7 +954,6 @@ int expand_stack(struct vm_area_struct *
vma->vm_mm->total_vm += grow;
if (vma->vm_flags & VM_LOCKED)
vma->vm_mm->locked_vm += grow;
- spin_unlock(&vma->vm_mm->page_table_lock);
return 0;
}
@@ -999,19 +987,16 @@ int expand_stack(struct vm_area_struct *
* the spinlock only before relocating the vma range ourself.
*/
address &= PAGE_MASK;
- spin_lock(&vma->vm_mm->page_table_lock);
grow = (vma->vm_start - address) >> PAGE_SHIFT;
/* Overcommit.. */
if (!vm_enough_memory(grow)) {
- spin_unlock(&vma->vm_mm->page_table_lock);
return -ENOMEM;
}
if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur ||
((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
current->rlim[RLIMIT_AS].rlim_cur) {
- spin_unlock(&vma->vm_mm->page_table_lock);
vm_unacct_memory(grow);
return -ENOMEM;
}
@@ -1020,7 +1005,6 @@ int expand_stack(struct vm_area_struct *
vma->vm_mm->total_vm += grow;
if (vma->vm_flags & VM_LOCKED)
vma->vm_mm->locked_vm += grow;
- spin_unlock(&vma->vm_mm->page_table_lock);
return 0;
}
@@ -1183,8 +1167,6 @@ static void unmap_region(struct mm_struc
/*
* Create a list of vma's touched by the unmap, removing them from the mm's
* vma list as we go..
- *
- * Called with the page_table_lock held.
*/
static void
detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -1308,8 +1290,8 @@ int do_munmap(struct mm_struct *mm, unsi
/*
* Remove the vma's, and unmap the actual pages
*/
- spin_lock(&mm->page_table_lock);
detach_vmas_to_be_unmapped(mm, mpnt, prev, end);
+ spin_lock(&mm->page_table_lock);
unmap_region(mm, mpnt, prev, start, end);
spin_unlock(&mm->page_table_lock);
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Remove page_table_lock from vma manipulations
2003-06-04 22:30 [PATCH] Remove page_table_lock from vma manipulations Dave McCracken
@ 2003-06-04 22:37 ` William Lee Irwin III
2003-06-04 22:38 ` Andrew Morton
1 sibling, 0 replies; 5+ messages in thread
From: William Lee Irwin III @ 2003-06-04 22:37 UTC (permalink / raw)
To: Dave McCracken; +Cc: Linux Memory Management, Linux Kernel
On Wed, Jun 04, 2003 at 05:30:25PM -0500, Dave McCracken wrote:
> After more careful consideration, I don't see any reasons why
> page_table_lock is necessary for dealing with vmas. I found one spot in
> swapoff, but it was easily changed to mmap_sem. I've beat on this code and
> mjb has beat on this code with no problems. Here's the patch to remove it.
> Feel free to poke holes in it.
shrink_list() calls try_to_unmap() under pte_chain_lock(page), and
hence try_to_unmap() cannot sleep. Furthermore try_to_unmap() calls
find_vma() under the sole protection of
spin_trylock(&mm->page_table_lock), which I don't see changed to a
read_trylock(&mm->mmap_sem) here.
Hence, this is racy.
-- wli
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Remove page_table_lock from vma manipulations
2003-06-04 22:30 [PATCH] Remove page_table_lock from vma manipulations Dave McCracken
2003-06-04 22:37 ` William Lee Irwin III
@ 2003-06-04 22:38 ` Andrew Morton
2003-06-04 22:58 ` Dave McCracken
1 sibling, 1 reply; 5+ messages in thread
From: Andrew Morton @ 2003-06-04 22:38 UTC (permalink / raw)
To: Dave McCracken; +Cc: Linux Memory Management, Linux Kernel
Dave McCracken wrote:
>
> After more careful consideration, I don't see any reasons why
> page_table_lock is necessary for dealing with vmas. I found one spot in
> swapoff, but it was easily changed to mmap_sem.
What keeps the VMA tree consistent when try_to_unmap_one()
runs find_vma()?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Remove page_table_lock from vma manipulations
2003-06-04 22:38 ` Andrew Morton
@ 2003-06-04 22:58 ` Dave McCracken
2003-06-05 16:12 ` Hugh Dickins
0 siblings, 1 reply; 5+ messages in thread
From: Dave McCracken @ 2003-06-04 22:58 UTC (permalink / raw)
To: Andrew Morton; +Cc: Linux Memory Management, Linux Kernel
[-- Attachment #1: Type: text/plain, Size: 688 bytes --]
--On Wednesday, June 04, 2003 15:38:09 -0700 Andrew Morton <akpm@digeo.com>
wrote:
>> After more careful consideration, I don't see any reasons why
>> page_table_lock is necessary for dealing with vmas. I found one spot in
>> swapoff, but it was easily changed to mmap_sem.
>
> What keeps the VMA tree consistent when try_to_unmap_one()
> runs find_vma()?
Gah. I don't know how I convinced myself that code was safe. It's easily
fixed. How does this one look?
Dave
======================================================================
Dave McCracken IBM Linux Base Kernel Team 1-512-838-3059
dmccr@us.ibm.com T/L 678-3059
[-- Attachment #2: nolock-2.5.70-mm4-2.diff --]
[-- Type: text/plain, Size: 5656 bytes --]
--- 2.5.70-mm4/./mm/swapfile.c 2003-05-26 20:00:25.000000000 -0500
+++ 2.5.70-mm4-nolock/./mm/swapfile.c 2003-06-04 16:02:49.000000000 -0500
@@ -493,6 +493,7 @@ static int unuse_process(struct mm_struc
/*
* Go through process' page directory.
*/
+ down_read(&mm->mmap_sem);
spin_lock(&mm->page_table_lock);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
pgd_t * pgd = pgd_offset(mm, vma->vm_start);
@@ -500,6 +501,7 @@ static int unuse_process(struct mm_struc
break;
}
spin_unlock(&mm->page_table_lock);
+ up_read(&mm->mmap_sem);
pte_chain_free(pte_chain);
return 0;
}
--- 2.5.70-mm4/./mm/mmap.c 2003-06-04 15:47:24.000000000 -0500
+++ 2.5.70-mm4-nolock/./mm/mmap.c 2003-06-04 16:02:49.000000000 -0500
@@ -346,9 +346,7 @@ static void vma_link(struct mm_struct *m
if (mapping)
down(&mapping->i_shared_sem);
- spin_lock(&mm->page_table_lock);
__vma_link(mm, vma, prev, rb_link, rb_parent);
- spin_unlock(&mm->page_table_lock);
if (mapping)
up(&mapping->i_shared_sem);
@@ -429,7 +427,6 @@ static int vma_merge(struct mm_struct *m
unsigned long end, unsigned long vm_flags,
struct file *file, unsigned long pgoff)
{
- spinlock_t * lock = &mm->page_table_lock;
/*
* We later require that vma->vm_flags == vm_flags, so this tests
@@ -458,7 +455,6 @@ static int vma_merge(struct mm_struct *m
down(&inode->i_mapping->i_shared_sem);
need_up = 1;
}
- spin_lock(lock);
prev->vm_end = end;
/*
@@ -471,7 +467,6 @@ static int vma_merge(struct mm_struct *m
prev->vm_end = next->vm_end;
__vma_unlink(mm, next, prev);
__remove_shared_vm_struct(next, inode);
- spin_unlock(lock);
if (need_up)
up(&inode->i_mapping->i_shared_sem);
if (file)
@@ -481,7 +476,6 @@ static int vma_merge(struct mm_struct *m
kmem_cache_free(vm_area_cachep, next);
return 1;
}
- spin_unlock(lock);
if (need_up)
up(&inode->i_mapping->i_shared_sem);
return 1;
@@ -497,10 +491,8 @@ static int vma_merge(struct mm_struct *m
pgoff, (end - addr) >> PAGE_SHIFT))
return 0;
if (end == prev->vm_start) {
- spin_lock(lock);
prev->vm_start = addr;
prev->vm_pgoff -= (end - addr) >> PAGE_SHIFT;
- spin_unlock(lock);
return 1;
}
}
@@ -945,19 +937,16 @@ int expand_stack(struct vm_area_struct *
*/
address += 4 + PAGE_SIZE - 1;
address &= PAGE_MASK;
- spin_lock(&vma->vm_mm->page_table_lock);
grow = (address - vma->vm_end) >> PAGE_SHIFT;
/* Overcommit.. */
if (!vm_enough_memory(grow)) {
- spin_unlock(&vma->vm_mm->page_table_lock);
return -ENOMEM;
}
if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur ||
((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
current->rlim[RLIMIT_AS].rlim_cur) {
- spin_unlock(&vma->vm_mm->page_table_lock);
vm_unacct_memory(grow);
return -ENOMEM;
}
@@ -965,7 +954,6 @@ int expand_stack(struct vm_area_struct *
vma->vm_mm->total_vm += grow;
if (vma->vm_flags & VM_LOCKED)
vma->vm_mm->locked_vm += grow;
- spin_unlock(&vma->vm_mm->page_table_lock);
return 0;
}
@@ -999,19 +987,16 @@ int expand_stack(struct vm_area_struct *
* the spinlock only before relocating the vma range ourself.
*/
address &= PAGE_MASK;
- spin_lock(&vma->vm_mm->page_table_lock);
grow = (vma->vm_start - address) >> PAGE_SHIFT;
/* Overcommit.. */
if (!vm_enough_memory(grow)) {
- spin_unlock(&vma->vm_mm->page_table_lock);
return -ENOMEM;
}
if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur ||
((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
current->rlim[RLIMIT_AS].rlim_cur) {
- spin_unlock(&vma->vm_mm->page_table_lock);
vm_unacct_memory(grow);
return -ENOMEM;
}
@@ -1020,7 +1005,6 @@ int expand_stack(struct vm_area_struct *
vma->vm_mm->total_vm += grow;
if (vma->vm_flags & VM_LOCKED)
vma->vm_mm->locked_vm += grow;
- spin_unlock(&vma->vm_mm->page_table_lock);
return 0;
}
@@ -1183,8 +1167,6 @@ static void unmap_region(struct mm_struc
/*
* Create a list of vma's touched by the unmap, removing them from the mm's
* vma list as we go..
- *
- * Called with the page_table_lock held.
*/
static void
detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -1308,8 +1290,8 @@ int do_munmap(struct mm_struct *mm, unsi
/*
* Remove the vma's, and unmap the actual pages
*/
- spin_lock(&mm->page_table_lock);
detach_vmas_to_be_unmapped(mm, mpnt, prev, end);
+ spin_lock(&mm->page_table_lock);
unmap_region(mm, mpnt, prev, start, end);
spin_unlock(&mm->page_table_lock);
--- 2.5.70-mm4/./mm/rmap.c 2003-06-04 15:47:24.000000000 -0500
+++ 2.5.70-mm4-nolock/./mm/rmap.c 2003-06-04 17:52:13.000000000 -0500
@@ -305,12 +305,21 @@ static int try_to_unmap_one(struct page
BUG();
/*
+ * Take mmap_sem across the find_vma call. We need to take it before
+ * the spinlock.
+ */
+ if (!down_read_trylock(&mm->mmap_sem)) {
+ ret = SWAP_AGAIN;
+ goto out_unmap;
+ }
+
+ /*
* We need the page_table_lock to protect us from page faults,
* munmap, fork, etc...
*/
if (!spin_trylock(&mm->page_table_lock)) {
- rmap_ptep_unmap(ptep);
- return SWAP_AGAIN;
+ ret = SWAP_AGAIN;
+ goto out_unlock_sem;
}
@@ -371,8 +380,11 @@ static int try_to_unmap_one(struct page
ret = SWAP_SUCCESS;
out_unlock:
- rmap_ptep_unmap(ptep);
spin_unlock(&mm->page_table_lock);
+out_unlock_sem:
+ up_read(&mm->mmap_sem);
+out_unmap:
+ rmap_ptep_unmap(ptep);
return ret;
}
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Remove page_table_lock from vma manipulations
2003-06-04 22:58 ` Dave McCracken
@ 2003-06-05 16:12 ` Hugh Dickins
0 siblings, 0 replies; 5+ messages in thread
From: Hugh Dickins @ 2003-06-05 16:12 UTC (permalink / raw)
To: Dave McCracken; +Cc: Andrew Morton, Linux Memory Management, Linux Kernel
On Wed, 4 Jun 2003, Dave McCracken wrote:
>
> Gah. I don't know how I convinced myself that code was safe. It's easily
> fixed. How does this one look?
I think you have to keep page_table_lock in expand_stack (GROWSUP and
down versions) because that is called with only down_read on mmap_sem,
so two could be racing: nothing else protects the various vma field
adjustments there. Otherwise appears correct to me. Beneficial?
Hugh
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2003-06-05 16:12 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-06-04 22:30 [PATCH] Remove page_table_lock from vma manipulations Dave McCracken
2003-06-04 22:37 ` William Lee Irwin III
2003-06-04 22:38 ` Andrew Morton
2003-06-04 22:58 ` Dave McCracken
2003-06-05 16:12 ` Hugh Dickins
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox