Re: [PATCH] Remove page_table_lock from vma manipulations

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Dave McCracken <dmccr@us.ibm.com>
To: Andrew Morton <akpm@digeo.com>
Cc: Linux Memory Management <linux-mm@kvack.org>,
	Linux Kernel <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH] Remove page_table_lock from vma manipulations
Date: Wed, 04 Jun 2003 17:58:22 -0500	[thread overview]
Message-ID: <149060000.1054767502@baldur.austin.ibm.com> (raw)
In-Reply-To: <3EDE74D1.767C6071@digeo.com>

[-- Attachment #1: Type: text/plain, Size: 688 bytes --]


--On Wednesday, June 04, 2003 15:38:09 -0700 Andrew Morton <akpm@digeo.com>
wrote:

>> After more careful consideration, I don't see any reasons why
>> page_table_lock is necessary for dealing with vmas.  I found one spot in
>> swapoff, but it was easily changed to mmap_sem.
> 
> What keeps the VMA tree consistent when try_to_unmap_one()
> runs find_vma()?

Gah.  I don't know how I convinced myself that code was safe.  It's easily
fixed.  How does this one look?

Dave

======================================================================
Dave McCracken          IBM Linux Base Kernel Team      1-512-838-3059
dmccr@us.ibm.com                                        T/L   678-3059

[-- Attachment #2: nolock-2.5.70-mm4-2.diff --]
[-- Type: text/plain, Size: 5656 bytes --]

--- 2.5.70-mm4/./mm/swapfile.c	2003-05-26 20:00:25.000000000 -0500
+++ 2.5.70-mm4-nolock/./mm/swapfile.c	2003-06-04 16:02:49.000000000 -0500
@@ -493,6 +493,7 @@ static int unuse_process(struct mm_struc
 	/*
 	 * Go through process' page directory.
 	 */
+	down_read(&mm->mmap_sem);
 	spin_lock(&mm->page_table_lock);
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		pgd_t * pgd = pgd_offset(mm, vma->vm_start);
@@ -500,6 +501,7 @@ static int unuse_process(struct mm_struc
 			break;
 	}
 	spin_unlock(&mm->page_table_lock);
+	up_read(&mm->mmap_sem);
 	pte_chain_free(pte_chain);
 	return 0;
 }
--- 2.5.70-mm4/./mm/mmap.c	2003-06-04 15:47:24.000000000 -0500
+++ 2.5.70-mm4-nolock/./mm/mmap.c	2003-06-04 16:02:49.000000000 -0500
@@ -346,9 +346,7 @@ static void vma_link(struct mm_struct *m
 
 	if (mapping)
 		down(&mapping->i_shared_sem);
-	spin_lock(&mm->page_table_lock);
 	__vma_link(mm, vma, prev, rb_link, rb_parent);
-	spin_unlock(&mm->page_table_lock);
 	if (mapping)
 		up(&mapping->i_shared_sem);
 
@@ -429,7 +427,6 @@ static int vma_merge(struct mm_struct *m
 			unsigned long end, unsigned long vm_flags,
 			struct file *file, unsigned long pgoff)
 {
-	spinlock_t * lock = &mm->page_table_lock;
 
 	/*
 	 * We later require that vma->vm_flags == vm_flags, so this tests
@@ -458,7 +455,6 @@ static int vma_merge(struct mm_struct *m
 			down(&inode->i_mapping->i_shared_sem);
 			need_up = 1;
 		}
-		spin_lock(lock);
 		prev->vm_end = end;
 
 		/*
@@ -471,7 +467,6 @@ static int vma_merge(struct mm_struct *m
 			prev->vm_end = next->vm_end;
 			__vma_unlink(mm, next, prev);
 			__remove_shared_vm_struct(next, inode);
-			spin_unlock(lock);
 			if (need_up)
 				up(&inode->i_mapping->i_shared_sem);
 			if (file)
@@ -481,7 +476,6 @@ static int vma_merge(struct mm_struct *m
 			kmem_cache_free(vm_area_cachep, next);
 			return 1;
 		}
-		spin_unlock(lock);
 		if (need_up)
 			up(&inode->i_mapping->i_shared_sem);
 		return 1;
@@ -497,10 +491,8 @@ static int vma_merge(struct mm_struct *m
 				pgoff, (end - addr) >> PAGE_SHIFT))
 			return 0;
 		if (end == prev->vm_start) {
-			spin_lock(lock);
 			prev->vm_start = addr;
 			prev->vm_pgoff -= (end - addr) >> PAGE_SHIFT;
-			spin_unlock(lock);
 			return 1;
 		}
 	}
@@ -945,19 +937,16 @@ int expand_stack(struct vm_area_struct *
 	 */
 	address += 4 + PAGE_SIZE - 1;
 	address &= PAGE_MASK;
- 	spin_lock(&vma->vm_mm->page_table_lock);
 	grow = (address - vma->vm_end) >> PAGE_SHIFT;
 
 	/* Overcommit.. */
 	if (!vm_enough_memory(grow)) {
-		spin_unlock(&vma->vm_mm->page_table_lock);
 		return -ENOMEM;
 	}

 	if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur ||
 			((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
 			current->rlim[RLIMIT_AS].rlim_cur) {
-		spin_unlock(&vma->vm_mm->page_table_lock);
 		vm_unacct_memory(grow);
 		return -ENOMEM;
 	}
@@ -965,7 +954,6 @@ int expand_stack(struct vm_area_struct *
 	vma->vm_mm->total_vm += grow;
 	if (vma->vm_flags & VM_LOCKED)
 		vma->vm_mm->locked_vm += grow;
-	spin_unlock(&vma->vm_mm->page_table_lock);
 	return 0;
 }
 
@@ -999,19 +987,16 @@ int expand_stack(struct vm_area_struct *
 	 * the spinlock only before relocating the vma range ourself.
 	 */
 	address &= PAGE_MASK;
- 	spin_lock(&vma->vm_mm->page_table_lock);
 	grow = (vma->vm_start - address) >> PAGE_SHIFT;
 
 	/* Overcommit.. */
 	if (!vm_enough_memory(grow)) {
-		spin_unlock(&vma->vm_mm->page_table_lock);
 		return -ENOMEM;
 	}

 	if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur ||
 			((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
 			current->rlim[RLIMIT_AS].rlim_cur) {
-		spin_unlock(&vma->vm_mm->page_table_lock);
 		vm_unacct_memory(grow);
 		return -ENOMEM;
 	}
@@ -1020,7 +1005,6 @@ int expand_stack(struct vm_area_struct *
 	vma->vm_mm->total_vm += grow;
 	if (vma->vm_flags & VM_LOCKED)
 		vma->vm_mm->locked_vm += grow;
-	spin_unlock(&vma->vm_mm->page_table_lock);
 	return 0;
 }
 
@@ -1183,8 +1167,6 @@ static void unmap_region(struct mm_struc
 /*
  * Create a list of vma's touched by the unmap, removing them from the mm's
  * vma list as we go..
- *
- * Called with the page_table_lock held.
  */
 static void
 detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -1308,8 +1290,8 @@ int do_munmap(struct mm_struct *mm, unsi
 	/*
 	 * Remove the vma's, and unmap the actual pages
 	 */
-	spin_lock(&mm->page_table_lock);
 	detach_vmas_to_be_unmapped(mm, mpnt, prev, end);
+	spin_lock(&mm->page_table_lock);
 	unmap_region(mm, mpnt, prev, start, end);
 	spin_unlock(&mm->page_table_lock);
 
--- 2.5.70-mm4/./mm/rmap.c	2003-06-04 15:47:24.000000000 -0500
+++ 2.5.70-mm4-nolock/./mm/rmap.c	2003-06-04 17:52:13.000000000 -0500
@@ -305,12 +305,21 @@ static int try_to_unmap_one(struct page 
 		BUG();
 
 	/*
+	 * Take mmap_sem across the find_vma call.  We need to take it before
+	 * the spinlock.
+	 */
+	if (!down_read_trylock(&mm->mmap_sem)) {
+		ret = SWAP_AGAIN;
+		goto out_unmap;
+	}
+
+	/*
 	 * We need the page_table_lock to protect us from page faults,
 	 * munmap, fork, etc...
 	 */
 	if (!spin_trylock(&mm->page_table_lock)) {
-		rmap_ptep_unmap(ptep);
-		return SWAP_AGAIN;
+		ret = SWAP_AGAIN;
+		goto out_unlock_sem;
 	}
 
 
@@ -371,8 +380,11 @@ static int try_to_unmap_one(struct page 
 	ret = SWAP_SUCCESS;
 
 out_unlock:
-	rmap_ptep_unmap(ptep);
 	spin_unlock(&mm->page_table_lock);
+out_unlock_sem:
+	up_read(&mm->mmap_sem);
+out_unmap:
+	rmap_ptep_unmap(ptep);
 	return ret;
 }

next prev parent reply	other threads:[~2003-06-04 22:58 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-06-04 22:30 Dave McCracken
2003-06-04 22:37 ` William Lee Irwin III
2003-06-04 22:38 ` Andrew Morton
2003-06-04 22:58   ` Dave McCracken [this message]
2003-06-05 16:12     ` Hugh Dickins

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=149060000.1054767502@baldur.austin.ibm.com \
    --to=dmccr@us.ibm.com \
    --cc=akpm@digeo.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox