linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 2.5.66-mm3] New page_convert_anon
@ 2003-04-03 21:28 Dave McCracken
  2003-04-03 21:55 ` Andrew Morton
  0 siblings, 1 reply; 5+ messages in thread
From: Dave McCracken @ 2003-04-03 21:28 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linux Memory Management, Linux Kernel

[-- Attachment #1: Type: text/plain, Size: 559 bytes --]


Ok, here's my cut at a simpler page_convert_anon.  It passes Hugh and
Ingo's test cases, plus passes stress testing.

I also removed the DEBUG code in page_remove_rmap for the case where it
doesn't find anything.  This case is now definitely legal, so we shouldn't
let people shoot themselves in the foot by enabling the check.

Dave McCracken

======================================================================
Dave McCracken          IBM Linux Base Kernel Team      1-512-838-3059
dmccr@us.ibm.com                                        T/L   678-3059

[-- Attachment #2: objfix-2.5.66-mm3-1.diff --]
[-- Type: text/plain, Size: 6746 bytes --]

--- 2.5.66-mm3/mm/rmap.c	2003-04-03 10:37:42.000000000 -0600
+++ 2.5.66-mm3-objfix/mm/rmap.c	2003-04-03 15:22:12.000000000 -0600
@@ -460,23 +460,6 @@ void page_remove_rmap(struct page * page
 			}
 		}
 	}
-#ifdef DEBUG_RMAP
-	/* Not found. This should NEVER happen! */
-	printk(KERN_ERR "page_remove_rmap: pte_chain %p not present.\n", ptep);
-	printk(KERN_ERR "page_remove_rmap: only found: ");
-	if (PageDirect(page)) {
-		printk("%llx", (u64)page->pte.direct);
-	} else {
-		for (pc = page->pte.chain; pc; pc = pc->next) {
-			int i;
-			for (i = 0; i < NRPTE; i++)
-				printk(" %d:%llx", i, (u64)pc->ptes[i]);
-		}
-	}
-	printk("\n");
-	printk(KERN_ERR "page_remove_rmap: driver cleared PG_reserved ?\n");
-#endif
-
 out:
 	pte_chain_unlock(page);
 	if (!page_mapped(page))
@@ -781,152 +764,91 @@ out:
  * Find all the mappings for an object-based page and convert them
  * to 'anonymous', ie create a pte_chain and store all the pte pointers there.
  *
- * This function takes the address_space->i_shared_sem and the pte_chain_lock
- * for the page.  It jumps through some hoops to preallocate the correct number
- * of pte_chain structures to ensure that it can complete without releasing
- * the lock.
+ * This function takes the address_space->i_shared_sem, sets the PageAnon flag, then
+ * sets the mm->page_table_lock for each vma and calls page_add_rmap.  This means
+ * there is a period when PageAnon is set, but still has some mappings with no
+ * pte_chain entry.  This is in fact safe, since page_remove_rmap will simply not
+ * find it.  try_to_unmap might erroneously return success, but kswapd will correctly
+ * see that there are still users of the page and send it around again.
  */
 int page_convert_anon(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 	struct vm_area_struct *vma;
-	struct pte_chain *pte_chain = NULL, *ptec;
+	struct pte_chain *pte_chain = NULL;
 	pte_t *pte;
-	pte_addr_t pte_paddr = 0;
-	int mapcount;
-	int ptecount;
-	int index = 1;
 	int err = 0;
 
 	down(&mapping->i_shared_sem);
 	pte_chain_lock(page);
-
+	SetPageLocked(page);
 	/*
 	 * Has someone else done it for us before we got the lock?
 	 * If so, pte.direct or pte.chain has replaced pte.mapcount.
 	 */
-	if (PageAnon(page))
+	if (PageAnon(page)) {
+		pte_chain_unlock(page);
 		goto out_unlock;
+	}
 
-	/*
-	 * Preallocate the pte_chains outside the lock.
-	 * If mapcount grows while we're allocating here, retry.
-	 * If mapcount shrinks, we free the excess before returning.
-	 */
-	mapcount = page->pte.mapcount;
-	while (index < mapcount) {
+	SetPageAnon(page);
+	if (page->pte.mapcount == 0) {
 		pte_chain_unlock(page);
-		up(&mapping->i_shared_sem);
-		for (; index < mapcount; index += NRPTE) {
-			if (index == 1)
-				index = 0;
-			ptec = pte_chain_alloc(GFP_KERNEL);
-			if (!ptec) {
-				err = -ENOMEM;
-				goto out_free;
-			}
-			ptec->next = pte_chain;
-			pte_chain = ptec;
-		}
-		down(&mapping->i_shared_sem);
-		pte_chain_lock(page);
-		/*
-		 * Has someone else done it while we were allocating?
-		 */
-		if (PageAnon(page))
-			goto out_unlock;
-		mapcount = page->pte.mapcount;
+		goto out_unlock;
 	}
-	if (!mapcount)
-		goto set_anon;
-
-again:
-	/*
-	 * We don't try for page_table_lock (what would we do when a
-	 * trylock fails?), therefore there's a small chance that we
-	 * catch a vma just as it's being unmapped and its page tables
-	 * freed.  Our pte_chain_lock prevents that on vmas that really
-	 * contain our page, but not on the others we look at.  So we
-	 * might locate junk that looks just like our page's pfn.  It's
-	 * a transient and very unlikely occurrence (much less likely
-	 * than a trylock failing), so check how many maps we find,
-	 * and if too many, start all over again.
-	 */
-	ptecount = 0;
-	ptec = pte_chain;
+	/* This is gonna get incremented by page_add_rmap */
+	dec_page_state(nr_mapped);
+	page->pte.mapcount = 0;
 
 	/*
-	 * Arrange for the first pte_chain to be partially filled at
-	 * the top, and the last (and any intermediates) to be full.
+	 * Now that the page is marked as anon, unlock it.  page_add_rmap will lock
+	 * it as necessary.
 	 */
-	index = mapcount % NRPTE;
-	if (index)
-		index = NRPTE - index;
+	pte_chain_unlock(page);
 
 	list_for_each_entry(vma, &mapping->i_mmap, shared) {
 		if (vma->vm_pgoff > (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)))
 			break;
+		if (!pte_chain) {
+			pte_chain = pte_chain_alloc(GFP_KERNEL);
+			if (!pte_chain) {
+				err = 1;
+				goto out_unlock;
+			}
+		}
+		spin_lock(&vma->vm_mm->page_table_lock);
 		pte = find_pte(vma, page, NULL);
 		if (pte) {
-			ptecount++;
-			if (unlikely(ptecount > mapcount))
-				goto again;
-			pte_paddr = ptep_to_paddr(pte);
-			pte_unmap(pte);
-			if (ptec) {
-				ptec->ptes[index] = pte_paddr;
-				index++;
-				if (index == NRPTE) {
-					ptec = ptec->next;
-					index = 0;
-				}
-			}
+			/* Make sure this isn't a duplicate */
+			page_remove_rmap(page, pte);
+			pte_chain = page_add_rmap(page, pte, pte_chain);
 		}
+		spin_unlock(&vma->vm_mm->page_table_lock);
 	}
 	list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
 		if (vma->vm_pgoff > (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)))
 			break;
+		if (!pte_chain) {
+			pte_chain = pte_chain_alloc(GFP_KERNEL);
+			if (!pte_chain) {
+				err = 1;
+				goto out_unlock;
+			}
+		}
+		spin_lock(&vma->vm_mm->page_table_lock);
 		pte = find_pte(vma, page, NULL);
 		if (pte) {
-			ptecount++;
-			if (unlikely(ptecount > mapcount))
-				goto again;
-			pte_paddr = ptep_to_paddr(pte);
-			pte_unmap(pte);
-			if (ptec) {
-				ptec->ptes[index] = pte_paddr;
-				index++;
-				if (index == NRPTE) {
-					ptec = ptec->next;
-					index = 0;
-				}
-			}
+			/* Make sure this isn't a duplicate */
+			page_remove_rmap(page, pte);
+			pte_chain = page_add_rmap(page, pte, pte_chain);
 		}
+		spin_unlock(&vma->vm_mm->page_table_lock);
 	}
 
-	BUG_ON(ptecount != mapcount);
-	if (mapcount == 1) {
-		SetPageDirect(page);
-		page->pte.direct = pte_paddr;
-		/* If pte_chain is set, it's all excess to be freed */
-	} else {
-		page->pte.chain = pte_chain;
-		/* Point pte_chain to any excess to be freed */
-		pte_chain = ptec;
-		BUG_ON(index);
-	}
-
-set_anon:
-	SetPageAnon(page);
 out_unlock:
-	pte_chain_unlock(page);
+	pte_chain_free(pte_chain);
+	ClearPageLocked(page);
 	up(&mapping->i_shared_sem);
-out_free:
-	while (pte_chain) {
-		ptec = pte_chain->next;
-		pte_chain_free(pte_chain);
-		pte_chain = ptec;
-	}
 	return err;
 }
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2.5.66-mm3] New page_convert_anon
  2003-04-03 21:28 [PATCH 2.5.66-mm3] New page_convert_anon Dave McCracken
@ 2003-04-03 21:55 ` Andrew Morton
  2003-04-03 22:12   ` Dave McCracken
  0 siblings, 1 reply; 5+ messages in thread
From: Andrew Morton @ 2003-04-03 21:55 UTC (permalink / raw)
  To: Dave McCracken; +Cc: linux-mm, linux-kernel

Dave McCracken <dmccr@us.ibm.com> wrote:
>
> 
> Ok, here's my cut at a simpler page_convert_anon.  It passes Hugh and
> Ingo's test cases, plus passes stress testing.
> 

OK.  I'd still be more comfortable if that page was locked though.  I expect
page->mapping is safe because truncate takes down pagetable before pagecache,
but it seems way cleaner.

Is there any reason you didn't do that?

 25-akpm/mm/filemap.c |    3 +++
 25-akpm/mm/fremap.c  |    5 ++++-
 25-akpm/mm/rmap.c    |   20 ++++++++++++--------
 3 files changed, 19 insertions(+), 9 deletions(-)

diff -puN mm/filemap.c~page_convert_anon-lock_page mm/filemap.c
--- 25/mm/filemap.c~page_convert_anon-lock_page	Thu Apr  3 13:44:29 2003
+++ 25-akpm/mm/filemap.c	Thu Apr  3 13:45:39 2003
@@ -64,6 +64,9 @@
  *  ->mmap_sem
  *    ->i_shared_sem		(various places)
  *
+ *  ->lock_page
+ *    ->i_shared_sem		(page_convert_anon)
+ *
  *  ->inode_lock
  *    ->sb_lock			(fs/fs-writeback.c)
  *    ->mapping->page_lock	(__sync_single_inode)
diff -puN mm/rmap.c~page_convert_anon-lock_page mm/rmap.c
--- 25/mm/rmap.c~page_convert_anon-lock_page	Thu Apr  3 13:44:34 2003
+++ 25-akpm/mm/rmap.c	Thu Apr  3 13:50:45 2003
@@ -764,12 +764,16 @@ out:
  * Find all the mappings for an object-based page and convert them
  * to 'anonymous', ie create a pte_chain and store all the pte pointers there.
  *
- * This function takes the address_space->i_shared_sem, sets the PageAnon flag, then
- * sets the mm->page_table_lock for each vma and calls page_add_rmap.  This means
- * there is a period when PageAnon is set, but still has some mappings with no
- * pte_chain entry.  This is in fact safe, since page_remove_rmap will simply not
- * find it.  try_to_unmap might erroneously return success, but kswapd will correctly
- * see that there are still users of the page and send it around again.
+ * This function takes the address_space->i_shared_sem, sets the PageAnon flag,
+ * then sets the mm->page_table_lock for each vma and calls page_add_rmap. This
+ * means there is a period when PageAnon is set, but still has some mappings
+ * with no pte_chain entry.  This is in fact safe, since page_remove_rmap will
+ * simply not find it.  try_to_unmap might erroneously return success, but it
+ * will never be called because the page_convert_anon() caller has locked the
+ * page.
+ *
+ * page_referenced() may fail to scan all the appropriate pte's and may return
+ * an inaccurate result.  This is so rare that it does not matter.
  */
 int page_convert_anon(struct page *page)
 {
@@ -801,8 +805,8 @@ int page_convert_anon(struct page *page)
 	page->pte.mapcount = 0;
 
 	/*
-	 * Now that the page is marked as anon, unlock it.  page_add_rmap will lock
-	 * it as necessary.
+	 * Now that the page is marked as anon, unlock it.  page_add_rmap will
+	 * lock it as necessary.
 	 */
 	pte_chain_unlock(page);
 
diff -puN mm/fremap.c~page_convert_anon-lock_page mm/fremap.c
--- 25/mm/fremap.c~page_convert_anon-lock_page	Thu Apr  3 13:44:49 2003
+++ 25-akpm/mm/fremap.c	Thu Apr  3 13:51:30 2003
@@ -73,7 +73,10 @@ int install_page(struct mm_struct *mm, s
 	pgidx += vma->vm_pgoff;
 	pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
 	if (!PageAnon(page) && (page->index != pgidx)) {
-		if (page_convert_anon(page) < 0)
+		lock_page(page);
+		err = page_convert_anon(page);
+		unlock_page(page);
+		if (err < 0)
 			goto err_free;
 	}
 

_

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2.5.66-mm3] New page_convert_anon
  2003-04-03 21:55 ` Andrew Morton
@ 2003-04-03 22:12   ` Dave McCracken
  2003-04-03 22:24     ` Andrew Morton
  0 siblings, 1 reply; 5+ messages in thread
From: Dave McCracken @ 2003-04-03 22:12 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 678 bytes --]


--On Thursday, April 03, 2003 13:55:22 -0800 Andrew Morton <akpm@digeo.com>
wrote:

> OK.  I'd still be more comfortable if that page was locked though.  I
> expect page->mapping is safe because truncate takes down pagetable before
> pagecache, but it seems way cleaner.
> 
> Is there any reason you didn't do that?

Because my mind slipped a gear and I did it wrong.  I tried to put it
inside page_convert_anon and messed it up.  How does this patch look?

Dave

======================================================================
Dave McCracken          IBM Linux Base Kernel Team      1-512-838-3059
dmccr@us.ibm.com                                        T/L   678-3059

[-- Attachment #2: objfix-2.5.66-mm3-2.diff --]
[-- Type: text/plain, Size: 6728 bytes --]

--- 2.5.66-mm3/mm/rmap.c	2003-04-03 10:37:42.000000000 -0600
+++ 2.5.66-mm3-objfix/mm/rmap.c	2003-04-03 16:11:03.000000000 -0600
@@ -460,23 +460,6 @@ void page_remove_rmap(struct page * page
 			}
 		}
 	}
-#ifdef DEBUG_RMAP
-	/* Not found. This should NEVER happen! */
-	printk(KERN_ERR "page_remove_rmap: pte_chain %p not present.\n", ptep);
-	printk(KERN_ERR "page_remove_rmap: only found: ");
-	if (PageDirect(page)) {
-		printk("%llx", (u64)page->pte.direct);
-	} else {
-		for (pc = page->pte.chain; pc; pc = pc->next) {
-			int i;
-			for (i = 0; i < NRPTE; i++)
-				printk(" %d:%llx", i, (u64)pc->ptes[i]);
-		}
-	}
-	printk("\n");
-	printk(KERN_ERR "page_remove_rmap: driver cleared PG_reserved ?\n");
-#endif
-
 out:
 	pte_chain_unlock(page);
 	if (!page_mapped(page))
@@ -781,152 +764,94 @@ out:
  * Find all the mappings for an object-based page and convert them
  * to 'anonymous', ie create a pte_chain and store all the pte pointers there.
  *
- * This function takes the address_space->i_shared_sem and the pte_chain_lock
- * for the page.  It jumps through some hoops to preallocate the correct number
- * of pte_chain structures to ensure that it can complete without releasing
- * the lock.
+ * This function does a lock_page, takes the
+ * address_space->i_shared_sem, sets the PageAnon flag, then sets the
+ * mm->page_table_lock for each vma and calls page_add_rmap.  This
+ * means there is a period when PageAnon is set, but still has some
+ * mappings with no pte_chain entry.  This is in fact safe, since
+ * page_remove_rmap will simply not find it.  try_to_unmap is safe because
+ * of the lock_page.
  */
 int page_convert_anon(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 	struct vm_area_struct *vma;
-	struct pte_chain *pte_chain = NULL, *ptec;
+	struct pte_chain *pte_chain = NULL;
 	pte_t *pte;
-	pte_addr_t pte_paddr = 0;
-	int mapcount;
-	int ptecount;
-	int index = 1;
 	int err = 0;
 
+	lock_page(page);
 	down(&mapping->i_shared_sem);
-	pte_chain_lock(page);
 
+	/* Take this only during setup */
+	pte_chain_lock(page);
 	/*
 	 * Has someone else done it for us before we got the lock?
 	 * If so, pte.direct or pte.chain has replaced pte.mapcount.
 	 */
-	if (PageAnon(page))
+	if (PageAnon(page)) {
+		pte_chain_unlock(page);
 		goto out_unlock;
+	}
 
-	/*
-	 * Preallocate the pte_chains outside the lock.
-	 * If mapcount grows while we're allocating here, retry.
-	 * If mapcount shrinks, we free the excess before returning.
-	 */
-	mapcount = page->pte.mapcount;
-	while (index < mapcount) {
+	SetPageAnon(page);
+	if (page->pte.mapcount == 0) {
 		pte_chain_unlock(page);
-		up(&mapping->i_shared_sem);
-		for (; index < mapcount; index += NRPTE) {
-			if (index == 1)
-				index = 0;
-			ptec = pte_chain_alloc(GFP_KERNEL);
-			if (!ptec) {
-				err = -ENOMEM;
-				goto out_free;
-			}
-			ptec->next = pte_chain;
-			pte_chain = ptec;
-		}
-		down(&mapping->i_shared_sem);
-		pte_chain_lock(page);
-		/*
-		 * Has someone else done it while we were allocating?
-		 */
-		if (PageAnon(page))
-			goto out_unlock;
-		mapcount = page->pte.mapcount;
+		goto out_unlock;
 	}
-	if (!mapcount)
-		goto set_anon;
+	/* This is gonna get incremented by page_add_rmap */
+	dec_page_state(nr_mapped);
+	page->pte.mapcount = 0;
 
-again:
 	/*
-	 * We don't try for page_table_lock (what would we do when a
-	 * trylock fails?), therefore there's a small chance that we
-	 * catch a vma just as it's being unmapped and its page tables
-	 * freed.  Our pte_chain_lock prevents that on vmas that really
-	 * contain our page, but not on the others we look at.  So we
-	 * might locate junk that looks just like our page's pfn.  It's
-	 * a transient and very unlikely occurrence (much less likely
-	 * than a trylock failing), so check how many maps we find,
-	 * and if too many, start all over again.
+	 * Now that the page is marked as anon, unlock it.  page_add_rmap will lock
+	 * it as necessary.
 	 */
-	ptecount = 0;
-	ptec = pte_chain;
-
-	/*
-	 * Arrange for the first pte_chain to be partially filled at
-	 * the top, and the last (and any intermediates) to be full.
-	 */
-	index = mapcount % NRPTE;
-	if (index)
-		index = NRPTE - index;
+	pte_chain_unlock(page);
 
 	list_for_each_entry(vma, &mapping->i_mmap, shared) {
 		if (vma->vm_pgoff > (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)))
 			break;
+		if (!pte_chain) {
+			pte_chain = pte_chain_alloc(GFP_KERNEL);
+			if (!pte_chain) {
+				err = 1;
+				goto out_unlock;
+			}
+		}
+		spin_lock(&vma->vm_mm->page_table_lock);
 		pte = find_pte(vma, page, NULL);
 		if (pte) {
-			ptecount++;
-			if (unlikely(ptecount > mapcount))
-				goto again;
-			pte_paddr = ptep_to_paddr(pte);
-			pte_unmap(pte);
-			if (ptec) {
-				ptec->ptes[index] = pte_paddr;
-				index++;
-				if (index == NRPTE) {
-					ptec = ptec->next;
-					index = 0;
-				}
-			}
+			/* Make sure this isn't a duplicate */
+			page_remove_rmap(page, pte);
+			pte_chain = page_add_rmap(page, pte, pte_chain);
 		}
+		spin_unlock(&vma->vm_mm->page_table_lock);
 	}
 	list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
 		if (vma->vm_pgoff > (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)))
 			break;
+		if (!pte_chain) {
+			pte_chain = pte_chain_alloc(GFP_KERNEL);
+			if (!pte_chain) {
+				err = 1;
+				goto out_unlock;
+			}
+		}
+		spin_lock(&vma->vm_mm->page_table_lock);
 		pte = find_pte(vma, page, NULL);
 		if (pte) {
-			ptecount++;
-			if (unlikely(ptecount > mapcount))
-				goto again;
-			pte_paddr = ptep_to_paddr(pte);
-			pte_unmap(pte);
-			if (ptec) {
-				ptec->ptes[index] = pte_paddr;
-				index++;
-				if (index == NRPTE) {
-					ptec = ptec->next;
-					index = 0;
-				}
-			}
+			/* Make sure this isn't a duplicate */
+			page_remove_rmap(page, pte);
+			pte_chain = page_add_rmap(page, pte, pte_chain);
 		}
+		spin_unlock(&vma->vm_mm->page_table_lock);
 	}
 
-	BUG_ON(ptecount != mapcount);
-	if (mapcount == 1) {
-		SetPageDirect(page);
-		page->pte.direct = pte_paddr;
-		/* If pte_chain is set, it's all excess to be freed */
-	} else {
-		page->pte.chain = pte_chain;
-		/* Point pte_chain to any excess to be freed */
-		pte_chain = ptec;
-		BUG_ON(index);
-	}
-
-set_anon:
-	SetPageAnon(page);
 out_unlock:
-	pte_chain_unlock(page);
+	pte_chain_free(pte_chain);
 	up(&mapping->i_shared_sem);
-out_free:
-	while (pte_chain) {
-		ptec = pte_chain->next;
-		pte_chain_free(pte_chain);
-		pte_chain = ptec;
-	}
+	unlock_page(page);
 	return err;
 }
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2.5.66-mm3] New page_convert_anon
  2003-04-03 22:12   ` Dave McCracken
@ 2003-04-03 22:24     ` Andrew Morton
  2003-04-03 22:40       ` Dave McCracken
  0 siblings, 1 reply; 5+ messages in thread
From: Andrew Morton @ 2003-04-03 22:24 UTC (permalink / raw)
  To: Dave McCracken; +Cc: linux-mm, linux-kernel

Dave McCracken <dmccr@us.ibm.com> wrote:
>
> How does this patch look?

It's more conventional to lock the page in the caller.  And we forgot the
whole reason for locking it: to keep truncate away.  We need to check that
the page is still on the address_space after the page lock has been acquired.

This applies on top of your first.

 25-akpm/mm/filemap.c |    3 +++
 25-akpm/mm/fremap.c  |    5 ++++-
 25-akpm/mm/rmap.c    |   27 ++++++++++++++++++---------
 3 files changed, 25 insertions(+), 10 deletions(-)

diff -puN mm/filemap.c~page_convert_anon-lock_page mm/filemap.c
--- 25/mm/filemap.c~page_convert_anon-lock_page	Thu Apr  3 14:20:40 2003
+++ 25-akpm/mm/filemap.c	Thu Apr  3 14:20:40 2003
@@ -64,6 +64,9 @@
  *  ->mmap_sem
  *    ->i_shared_sem		(various places)
  *
+ *  ->lock_page
+ *    ->i_shared_sem		(page_convert_anon)
+ *
  *  ->inode_lock
  *    ->sb_lock			(fs/fs-writeback.c)
  *    ->mapping->page_lock	(__sync_single_inode)
diff -puN mm/rmap.c~page_convert_anon-lock_page mm/rmap.c
--- 25/mm/rmap.c~page_convert_anon-lock_page	Thu Apr  3 14:20:40 2003
+++ 25-akpm/mm/rmap.c	Thu Apr  3 14:22:17 2003
@@ -764,21 +764,29 @@ out:
  * Find all the mappings for an object-based page and convert them
  * to 'anonymous', ie create a pte_chain and store all the pte pointers there.
  *
- * This function takes the address_space->i_shared_sem, sets the PageAnon flag, then
- * sets the mm->page_table_lock for each vma and calls page_add_rmap.  This means
- * there is a period when PageAnon is set, but still has some mappings with no
- * pte_chain entry.  This is in fact safe, since page_remove_rmap will simply not
- * find it.  try_to_unmap might erroneously return success, but kswapd will correctly
- * see that there are still users of the page and send it around again.
+ * This function takes the address_space->i_shared_sem, sets the PageAnon flag,
+ * then sets the mm->page_table_lock for each vma and calls page_add_rmap. This
+ * means there is a period when PageAnon is set, but still has some mappings
+ * with no pte_chain entry.  This is in fact safe, since page_remove_rmap will
+ * simply not find it.  try_to_unmap might erroneously return success, but it
+ * will never be called because the page_convert_anon() caller has locked the
+ * page.
+ *
+ * page_referenced() may fail to scan all the appropriate pte's and may return
+ * an inaccurate result.  This is so rare that it does not matter.
  */
 int page_convert_anon(struct page *page)
 {
-	struct address_space *mapping = page->mapping;
+	struct address_space *mapping;
 	struct vm_area_struct *vma;
 	struct pte_chain *pte_chain = NULL;
 	pte_t *pte;
 	int err = 0;
 
+	mapping = page->mapping;
+	if (mapping == NULL)
+		goto out;		/* truncate won the lock_page() race */
+
 	down(&mapping->i_shared_sem);
 	pte_chain_lock(page);
 	SetPageLocked(page);
@@ -801,8 +809,8 @@ int page_convert_anon(struct page *page)
 	page->pte.mapcount = 0;
 
 	/*
-	 * Now that the page is marked as anon, unlock it.  page_add_rmap will lock
-	 * it as necessary.
+	 * Now that the page is marked as anon, unlock it.  page_add_rmap will
+	 * lock it as necessary.
 	 */
 	pte_chain_unlock(page);
 
@@ -849,6 +857,7 @@ out_unlock:
 	pte_chain_free(pte_chain);
 	ClearPageLocked(page);
 	up(&mapping->i_shared_sem);
+out:
 	return err;
 }
 
diff -puN mm/fremap.c~page_convert_anon-lock_page mm/fremap.c
--- 25/mm/fremap.c~page_convert_anon-lock_page	Thu Apr  3 14:20:40 2003
+++ 25-akpm/mm/fremap.c	Thu Apr  3 14:20:40 2003
@@ -73,7 +73,10 @@ int install_page(struct mm_struct *mm, s
 	pgidx += vma->vm_pgoff;
 	pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
 	if (!PageAnon(page) && (page->index != pgidx)) {
-		if (page_convert_anon(page) < 0)
+		lock_page(page);
+		err = page_convert_anon(page);
+		unlock_page(page);
+		if (err < 0)
 			goto err_free;
 	}
 

_

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2.5.66-mm3] New page_convert_anon
  2003-04-03 22:24     ` Andrew Morton
@ 2003-04-03 22:40       ` Dave McCracken
  0 siblings, 0 replies; 5+ messages in thread
From: Dave McCracken @ 2003-04-03 22:40 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 654 bytes --]


--On Thursday, April 03, 2003 14:24:41 -0800 Andrew Morton <akpm@digeo.com>
wrote:

> It's more conventional to lock the page in the caller.  And we forgot the
> whole reason for locking it: to keep truncate away.  We need to check that
> the page is still on the address_space after the page lock has been
> acquired.
> 
> This applies on top of your first.

Ok, that all makes sense.  Here's a patch with all your changes applied.

Dave

======================================================================
Dave McCracken          IBM Linux Base Kernel Team      1-512-838-3059
dmccr@us.ibm.com                                        T/L   678-3059

[-- Attachment #2: objfix-2.5.66-mm3-3.diff --]
[-- Type: text/plain, Size: 7561 bytes --]

--- 2.5.66-mm3/./mm/fremap.c	2003-04-03 10:37:42.000000000 -0600
+++ 2.5.66-mm3-objfix/./mm/fremap.c	2003-04-03 16:35:27.000000000 -0600
@@ -73,7 +73,10 @@ int install_page(struct mm_struct *mm, s
 	pgidx += vma->vm_pgoff;
 	pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
 	if (!PageAnon(page) && (page->index != pgidx)) {
-		if (page_convert_anon(page) < 0)
+		lock_page(page);
+		err = page_convert_anon(page);
+		unlock_page(page);
+		if (err < 0)
 			goto err_free;
 	}
 
--- 2.5.66-mm3/./mm/rmap.c	2003-04-03 10:37:42.000000000 -0600
+++ 2.5.66-mm3-objfix/./mm/rmap.c	2003-04-03 16:36:17.000000000 -0600
@@ -460,23 +460,6 @@ void page_remove_rmap(struct page * page
 			}
 		}
 	}
-#ifdef DEBUG_RMAP
-	/* Not found. This should NEVER happen! */
-	printk(KERN_ERR "page_remove_rmap: pte_chain %p not present.\n", ptep);
-	printk(KERN_ERR "page_remove_rmap: only found: ");
-	if (PageDirect(page)) {
-		printk("%llx", (u64)page->pte.direct);
-	} else {
-		for (pc = page->pte.chain; pc; pc = pc->next) {
-			int i;
-			for (i = 0; i < NRPTE; i++)
-				printk(" %d:%llx", i, (u64)pc->ptes[i]);
-		}
-	}
-	printk("\n");
-	printk(KERN_ERR "page_remove_rmap: driver cleared PG_reserved ?\n");
-#endif
-
 out:
 	pte_chain_unlock(page);
 	if (!page_mapped(page))
@@ -781,152 +764,100 @@ out:
  * Find all the mappings for an object-based page and convert them
  * to 'anonymous', ie create a pte_chain and store all the pte pointers there.
  *
- * This function takes the address_space->i_shared_sem and the pte_chain_lock
- * for the page.  It jumps through some hoops to preallocate the correct number
- * of pte_chain structures to ensure that it can complete without releasing
- * the lock.
+ * This function takes the address_space->i_shared_sem, sets the PageAnon flag,
+ * then sets the mm->page_table_lock for each vma and calls page_add_rmap. This
+ * means there is a period when PageAnon is set, but still has some mappings
+ * with no pte_chain entry.  This is in fact safe, since page_remove_rmap will
+ * simply not find it.  try_to_unmap might erroneously return success, but it
+ * will never be called because the page_convert_anon() caller has locked the
+ * page.
+ *
+ * page_referenced() may fail to scan all the appropriate pte's and may return
+ * an inaccurate result.  This is so rare that it does not matter.
  */
 int page_convert_anon(struct page *page)
 {
-	struct address_space *mapping = page->mapping;
+	struct address_space *mapping;
 	struct vm_area_struct *vma;
-	struct pte_chain *pte_chain = NULL, *ptec;
+	struct pte_chain *pte_chain = NULL;
 	pte_t *pte;
-	pte_addr_t pte_paddr = 0;
-	int mapcount;
-	int ptecount;
-	int index = 1;
 	int err = 0;
 
+	mapping = page->mapping;
+	if (mapping == NULL)
+		goto out;		/* truncate won the lock_page() race */
+
 	down(&mapping->i_shared_sem);
-	pte_chain_lock(page);
 
+	/* Take this only during setup */
+	pte_chain_lock(page);
 	/*
 	 * Has someone else done it for us before we got the lock?
 	 * If so, pte.direct or pte.chain has replaced pte.mapcount.
 	 */
-	if (PageAnon(page))
+	if (PageAnon(page)) {
+		pte_chain_unlock(page);
 		goto out_unlock;
+	}
 
-	/*
-	 * Preallocate the pte_chains outside the lock.
-	 * If mapcount grows while we're allocating here, retry.
-	 * If mapcount shrinks, we free the excess before returning.
-	 */
-	mapcount = page->pte.mapcount;
-	while (index < mapcount) {
+	SetPageAnon(page);
+	if (page->pte.mapcount == 0) {
 		pte_chain_unlock(page);
-		up(&mapping->i_shared_sem);
-		for (; index < mapcount; index += NRPTE) {
-			if (index == 1)
-				index = 0;
-			ptec = pte_chain_alloc(GFP_KERNEL);
-			if (!ptec) {
-				err = -ENOMEM;
-				goto out_free;
-			}
-			ptec->next = pte_chain;
-			pte_chain = ptec;
-		}
-		down(&mapping->i_shared_sem);
-		pte_chain_lock(page);
-		/*
-		 * Has someone else done it while we were allocating?
-		 */
-		if (PageAnon(page))
-			goto out_unlock;
-		mapcount = page->pte.mapcount;
+		goto out_unlock;
 	}
-	if (!mapcount)
-		goto set_anon;
+	/* This is gonna get incremented by page_add_rmap */
+	dec_page_state(nr_mapped);
+	page->pte.mapcount = 0;
 
-again:
 	/*
-	 * We don't try for page_table_lock (what would we do when a
-	 * trylock fails?), therefore there's a small chance that we
-	 * catch a vma just as it's being unmapped and its page tables
-	 * freed.  Our pte_chain_lock prevents that on vmas that really
-	 * contain our page, but not on the others we look at.  So we
-	 * might locate junk that looks just like our page's pfn.  It's
-	 * a transient and very unlikely occurrence (much less likely
-	 * than a trylock failing), so check how many maps we find,
-	 * and if too many, start all over again.
+	 * Now that the page is marked as anon, unlock it.
+	 * page_add_rmap will lock it as necessary.
 	 */
-	ptecount = 0;
-	ptec = pte_chain;
-
-	/*
-	 * Arrange for the first pte_chain to be partially filled at
-	 * the top, and the last (and any intermediates) to be full.
-	 */
-	index = mapcount % NRPTE;
-	if (index)
-		index = NRPTE - index;
+	pte_chain_unlock(page);
 
 	list_for_each_entry(vma, &mapping->i_mmap, shared) {
 		if (vma->vm_pgoff > (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)))
 			break;
+		if (!pte_chain) {
+			pte_chain = pte_chain_alloc(GFP_KERNEL);
+			if (!pte_chain) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+		}
+		spin_lock(&vma->vm_mm->page_table_lock);
 		pte = find_pte(vma, page, NULL);
 		if (pte) {
-			ptecount++;
-			if (unlikely(ptecount > mapcount))
-				goto again;
-			pte_paddr = ptep_to_paddr(pte);
-			pte_unmap(pte);
-			if (ptec) {
-				ptec->ptes[index] = pte_paddr;
-				index++;
-				if (index == NRPTE) {
-					ptec = ptec->next;
-					index = 0;
-				}
-			}
+			/* Make sure this isn't a duplicate */
+			page_remove_rmap(page, pte);
+			pte_chain = page_add_rmap(page, pte, pte_chain);
 		}
+		spin_unlock(&vma->vm_mm->page_table_lock);
 	}
 	list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
 		if (vma->vm_pgoff > (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)))
 			break;
+		if (!pte_chain) {
+			pte_chain = pte_chain_alloc(GFP_KERNEL);
+			if (!pte_chain) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+		}
+		spin_lock(&vma->vm_mm->page_table_lock);
 		pte = find_pte(vma, page, NULL);
 		if (pte) {
-			ptecount++;
-			if (unlikely(ptecount > mapcount))
-				goto again;
-			pte_paddr = ptep_to_paddr(pte);
-			pte_unmap(pte);
-			if (ptec) {
-				ptec->ptes[index] = pte_paddr;
-				index++;
-				if (index == NRPTE) {
-					ptec = ptec->next;
-					index = 0;
-				}
-			}
+			/* Make sure this isn't a duplicate */
+			page_remove_rmap(page, pte);
+			pte_chain = page_add_rmap(page, pte, pte_chain);
 		}
+		spin_unlock(&vma->vm_mm->page_table_lock);
 	}
 
-	BUG_ON(ptecount != mapcount);
-	if (mapcount == 1) {
-		SetPageDirect(page);
-		page->pte.direct = pte_paddr;
-		/* If pte_chain is set, it's all excess to be freed */
-	} else {
-		page->pte.chain = pte_chain;
-		/* Point pte_chain to any excess to be freed */
-		pte_chain = ptec;
-		BUG_ON(index);
-	}
-
-set_anon:
-	SetPageAnon(page);
 out_unlock:
-	pte_chain_unlock(page);
+	pte_chain_free(pte_chain);
 	up(&mapping->i_shared_sem);
-out_free:
-	while (pte_chain) {
-		ptec = pte_chain->next;
-		pte_chain_free(pte_chain);
-		pte_chain = ptec;
-	}
+out:
 	return err;
 }
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2003-04-03 22:40 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-04-03 21:28 [PATCH 2.5.66-mm3] New page_convert_anon Dave McCracken
2003-04-03 21:55 ` Andrew Morton
2003-04-03 22:12   ` Dave McCracken
2003-04-03 22:24     ` Andrew Morton
2003-04-03 22:40       ` Dave McCracken

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox