linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Dave McCracken <dmccr@us.ibm.com>
To: Andrew Morton <akpm@digeo.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH 2.5.66-mm3] New page_convert_anon
Date: Thu, 03 Apr 2003 16:40:20 -0600	[thread overview]
Message-ID: <95000000.1049409620@baldur.austin.ibm.com> (raw)
In-Reply-To: <20030403142441.4a8a713e.akpm@digeo.com>

[-- Attachment #1: Type: text/plain, Size: 654 bytes --]


--On Thursday, April 03, 2003 14:24:41 -0800 Andrew Morton <akpm@digeo.com>
wrote:

> It's more conventional to lock the page in the caller.  And we forgot the
> whole reason for locking it: to keep truncate away.  We need to check that
> the page is still on the address_space after the page lock has been
> acquired.
> 
> This applies on top of your first.

Ok, that all makes sense.  Here's a patch with all your changes applied.

Dave

======================================================================
Dave McCracken          IBM Linux Base Kernel Team      1-512-838-3059
dmccr@us.ibm.com                                        T/L   678-3059

[-- Attachment #2: objfix-2.5.66-mm3-3.diff --]
[-- Type: text/plain, Size: 7561 bytes --]

--- 2.5.66-mm3/./mm/fremap.c	2003-04-03 10:37:42.000000000 -0600
+++ 2.5.66-mm3-objfix/./mm/fremap.c	2003-04-03 16:35:27.000000000 -0600
@@ -73,7 +73,10 @@ int install_page(struct mm_struct *mm, s
 	pgidx += vma->vm_pgoff;
 	pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
 	if (!PageAnon(page) && (page->index != pgidx)) {
-		if (page_convert_anon(page) < 0)
+		lock_page(page);
+		err = page_convert_anon(page);
+		unlock_page(page);
+		if (err < 0)
 			goto err_free;
 	}
 
--- 2.5.66-mm3/./mm/rmap.c	2003-04-03 10:37:42.000000000 -0600
+++ 2.5.66-mm3-objfix/./mm/rmap.c	2003-04-03 16:36:17.000000000 -0600
@@ -460,23 +460,6 @@ void page_remove_rmap(struct page * page
 			}
 		}
 	}
-#ifdef DEBUG_RMAP
-	/* Not found. This should NEVER happen! */
-	printk(KERN_ERR "page_remove_rmap: pte_chain %p not present.\n", ptep);
-	printk(KERN_ERR "page_remove_rmap: only found: ");
-	if (PageDirect(page)) {
-		printk("%llx", (u64)page->pte.direct);
-	} else {
-		for (pc = page->pte.chain; pc; pc = pc->next) {
-			int i;
-			for (i = 0; i < NRPTE; i++)
-				printk(" %d:%llx", i, (u64)pc->ptes[i]);
-		}
-	}
-	printk("\n");
-	printk(KERN_ERR "page_remove_rmap: driver cleared PG_reserved ?\n");
-#endif
-
 out:
 	pte_chain_unlock(page);
 	if (!page_mapped(page))
@@ -781,152 +764,100 @@ out:
  * Find all the mappings for an object-based page and convert them
  * to 'anonymous', ie create a pte_chain and store all the pte pointers there.
  *
- * This function takes the address_space->i_shared_sem and the pte_chain_lock
- * for the page.  It jumps through some hoops to preallocate the correct number
- * of pte_chain structures to ensure that it can complete without releasing
- * the lock.
+ * This function takes the address_space->i_shared_sem, sets the PageAnon flag,
+ * then sets the mm->page_table_lock for each vma and calls page_add_rmap. This
+ * means there is a period when PageAnon is set, but still has some mappings
+ * with no pte_chain entry.  This is in fact safe, since page_remove_rmap will
+ * simply not find it.  try_to_unmap might erroneously return success, but it
+ * will never be called because the page_convert_anon() caller has locked the
+ * page.
+ *
+ * page_referenced() may fail to scan all the appropriate pte's and may return
+ * an inaccurate result.  This is so rare that it does not matter.
  */
 int page_convert_anon(struct page *page)
 {
-	struct address_space *mapping = page->mapping;
+	struct address_space *mapping;
 	struct vm_area_struct *vma;
-	struct pte_chain *pte_chain = NULL, *ptec;
+	struct pte_chain *pte_chain = NULL;
 	pte_t *pte;
-	pte_addr_t pte_paddr = 0;
-	int mapcount;
-	int ptecount;
-	int index = 1;
 	int err = 0;
 
+	mapping = page->mapping;
+	if (mapping == NULL)
+		goto out;		/* truncate won the lock_page() race */
+
 	down(&mapping->i_shared_sem);
-	pte_chain_lock(page);
 
+	/* Take this only during setup */
+	pte_chain_lock(page);
 	/*
 	 * Has someone else done it for us before we got the lock?
 	 * If so, pte.direct or pte.chain has replaced pte.mapcount.
 	 */
-	if (PageAnon(page))
+	if (PageAnon(page)) {
+		pte_chain_unlock(page);
 		goto out_unlock;
+	}
 
-	/*
-	 * Preallocate the pte_chains outside the lock.
-	 * If mapcount grows while we're allocating here, retry.
-	 * If mapcount shrinks, we free the excess before returning.
-	 */
-	mapcount = page->pte.mapcount;
-	while (index < mapcount) {
+	SetPageAnon(page);
+	if (page->pte.mapcount == 0) {
 		pte_chain_unlock(page);
-		up(&mapping->i_shared_sem);
-		for (; index < mapcount; index += NRPTE) {
-			if (index == 1)
-				index = 0;
-			ptec = pte_chain_alloc(GFP_KERNEL);
-			if (!ptec) {
-				err = -ENOMEM;
-				goto out_free;
-			}
-			ptec->next = pte_chain;
-			pte_chain = ptec;
-		}
-		down(&mapping->i_shared_sem);
-		pte_chain_lock(page);
-		/*
-		 * Has someone else done it while we were allocating?
-		 */
-		if (PageAnon(page))
-			goto out_unlock;
-		mapcount = page->pte.mapcount;
+		goto out_unlock;
 	}
-	if (!mapcount)
-		goto set_anon;
+	/* This is gonna get incremented by page_add_rmap */
+	dec_page_state(nr_mapped);
+	page->pte.mapcount = 0;
 
-again:
 	/*
-	 * We don't try for page_table_lock (what would we do when a
-	 * trylock fails?), therefore there's a small chance that we
-	 * catch a vma just as it's being unmapped and its page tables
-	 * freed.  Our pte_chain_lock prevents that on vmas that really
-	 * contain our page, but not on the others we look at.  So we
-	 * might locate junk that looks just like our page's pfn.  It's
-	 * a transient and very unlikely occurrence (much less likely
-	 * than a trylock failing), so check how many maps we find,
-	 * and if too many, start all over again.
+	 * Now that the page is marked as anon, unlock it.
+	 * page_add_rmap will lock it as necessary.
 	 */
-	ptecount = 0;
-	ptec = pte_chain;
-
-	/*
-	 * Arrange for the first pte_chain to be partially filled at
-	 * the top, and the last (and any intermediates) to be full.
-	 */
-	index = mapcount % NRPTE;
-	if (index)
-		index = NRPTE - index;
+	pte_chain_unlock(page);
 
 	list_for_each_entry(vma, &mapping->i_mmap, shared) {
 		if (vma->vm_pgoff > (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)))
 			break;
+		if (!pte_chain) {
+			pte_chain = pte_chain_alloc(GFP_KERNEL);
+			if (!pte_chain) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+		}
+		spin_lock(&vma->vm_mm->page_table_lock);
 		pte = find_pte(vma, page, NULL);
 		if (pte) {
-			ptecount++;
-			if (unlikely(ptecount > mapcount))
-				goto again;
-			pte_paddr = ptep_to_paddr(pte);
-			pte_unmap(pte);
-			if (ptec) {
-				ptec->ptes[index] = pte_paddr;
-				index++;
-				if (index == NRPTE) {
-					ptec = ptec->next;
-					index = 0;
-				}
-			}
+			/* Make sure this isn't a duplicate */
+			page_remove_rmap(page, pte);
+			pte_chain = page_add_rmap(page, pte, pte_chain);
 		}
+		spin_unlock(&vma->vm_mm->page_table_lock);
 	}
 	list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
 		if (vma->vm_pgoff > (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)))
 			break;
+		if (!pte_chain) {
+			pte_chain = pte_chain_alloc(GFP_KERNEL);
+			if (!pte_chain) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+		}
+		spin_lock(&vma->vm_mm->page_table_lock);
 		pte = find_pte(vma, page, NULL);
 		if (pte) {
-			ptecount++;
-			if (unlikely(ptecount > mapcount))
-				goto again;
-			pte_paddr = ptep_to_paddr(pte);
-			pte_unmap(pte);
-			if (ptec) {
-				ptec->ptes[index] = pte_paddr;
-				index++;
-				if (index == NRPTE) {
-					ptec = ptec->next;
-					index = 0;
-				}
-			}
+			/* Make sure this isn't a duplicate */
+			page_remove_rmap(page, pte);
+			pte_chain = page_add_rmap(page, pte, pte_chain);
 		}
+		spin_unlock(&vma->vm_mm->page_table_lock);
 	}
 
-	BUG_ON(ptecount != mapcount);
-	if (mapcount == 1) {
-		SetPageDirect(page);
-		page->pte.direct = pte_paddr;
-		/* If pte_chain is set, it's all excess to be freed */
-	} else {
-		page->pte.chain = pte_chain;
-		/* Point pte_chain to any excess to be freed */
-		pte_chain = ptec;
-		BUG_ON(index);
-	}
-
-set_anon:
-	SetPageAnon(page);
 out_unlock:
-	pte_chain_unlock(page);
+	pte_chain_free(pte_chain);
 	up(&mapping->i_shared_sem);
-out_free:
-	while (pte_chain) {
-		ptec = pte_chain->next;
-		pte_chain_free(pte_chain);
-		pte_chain = ptec;
-	}
+out:
 	return err;
 }
 

      reply	other threads:[~2003-04-03 22:40 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-04-03 21:28 Dave McCracken
2003-04-03 21:55 ` Andrew Morton
2003-04-03 22:12   ` Dave McCracken
2003-04-03 22:24     ` Andrew Morton
2003-04-03 22:40       ` Dave McCracken [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=95000000.1049409620@baldur.austin.ibm.com \
    --to=dmccr@us.ibm.com \
    --cc=akpm@digeo.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox