[PATCH 2.5.42-mm3] More shared page table fixes

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 2.5.42-mm3] More shared page table fixes
@ 2002-10-15 15:40 Dave McCracken
  2002-10-16  6:07 ` Andrew Morton
  0 siblings, 1 reply; 2+ messages in thread
From: Dave McCracken @ 2002-10-15 15:40 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linux Kernel, Linux Memory Management

[-- Attachment #1: Type: text/plain, Size: 364 bytes --]


This patch gets the unmap_all_pages function right for PAE-enabled
machines.  It also adds a forgotten spinlock to pte_try_to_share.

Dave McCracken

======================================================================
Dave McCracken          IBM Linux Base Kernel Team      1-512-838-3059
dmccr@us.ibm.com                                        T/L   678-3059

[-- Attachment #2: shpte-2.5.42-mm3-1.diff --]
[-- Type: text/plain, Size: 5123 bytes --]

--- 2.5.42-mm3/./mm/memory.c	2002-10-15 09:59:37.000000000 -0500
+++ 2.5.42-mm3-shpte/./mm/memory.c	2002-10-15 10:18:15.000000000 -0500
@@ -372,6 +372,7 @@
 	struct vm_area_struct *lvma;
 	struct page *ptepage;
 	unsigned long base;
+	pte_t *pte = NULL;
 
 	/*
 	 * It already has a pte page.  No point in checking further.
@@ -394,6 +395,8 @@
 
 	as = vma->vm_file->f_dentry->d_inode->i_mapping;
 
+	spin_lock(&as->i_shared_lock);
+
 	list_for_each_entry(lvma, &as->i_mmap_shared, shared) {
 		pgd_t *lpgd;
 		pmd_t *lpmd;
@@ -431,9 +434,11 @@
 		else
 			pmdval = pmd_wrprotect(*lpmd);
 		set_pmd(pmd, pmdval);
-		return pte_page_map(ptepage, address);
+		pte = pte_page_map(ptepage, address);
+		break;
 	}
-	return NULL;
+	spin_unlock(&as->i_shared_lock);
+	return pte;
 }
 #endif
 
@@ -846,14 +851,16 @@
 	if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
 		end = ((address + PGDIR_SIZE) & PGDIR_MASK);
 	do {
-		ptepage = pmd_page(*pmd);
-		pte_page_lock(ptepage);
+		if (pmd_present(*pmd)) {
+			ptepage = pmd_page(*pmd);
+			pte_page_lock(ptepage);
 #ifdef CONFIG_SHAREPTE
-		if (page_count(ptepage) > 1)
-			BUG();
+			if (page_count(ptepage) > 1)
+				BUG();
 #endif
-		zap_pte_range(tlb, pmd, address, end - address);
-		pte_page_unlock(ptepage);
+			zap_pte_range(tlb, pmd, address, end - address);
+			pte_page_unlock(ptepage);
+		}
 		address = (address + PMD_SIZE) & PMD_MASK; 
 		pmd++;
 	} while (address < end);
@@ -938,72 +945,105 @@
 	mmu_gather_t *tlb;
 	pgd_t *pgd;
 	pmd_t *pmd;
-	unsigned long address;
-	unsigned long end;
+	unsigned long address = 0;
+	unsigned long vm_end = 0, prev_end, pmd_end;
 
 	tlb = tlb_gather_mmu(mm, 1);
 
 	vma = mm->mmap;
-	if (!vma)
-		goto out;
-
-	mm->map_count--;
-	if (is_vm_hugetlb_page(vma)) {
-		vma->vm_ops->close(vma);
-		goto next_vma;
-	}
-
-	address = vma->vm_start;
-	end = ((address + PGDIR_SIZE) & PGDIR_MASK);
+	for (;;) {
+		if (address >= vm_end) {
+			if (!vma)
+				goto out;
 
-	pgd = pgd_offset(mm, address);
-	pmd = pmd_offset(pgd, address);
-	do {
-		do {
-			if (pmd_none(*pmd))
-				goto skip_pmd;
-			if (pmd_bad(*pmd)) {
-				pmd_ERROR(*pmd);
-				pmd_clear(pmd);
-				goto skip_pmd;
-			}
-		
-			ptepage = pmd_page(*pmd);
-			pte_page_lock(ptepage);
-			if (page_count(ptepage) > 1) {
-				pmd_clear(pmd);
-				pgtable_remove_rmap_locked(ptepage, mm);
-				mm->rss -= ptepage->private;
-				put_page(ptepage);
-			} else {
-				zap_pte_range(tlb, pmd, address, end - address);
-			}
-			pte_page_unlock(ptepage);
-skip_pmd:
-			pmd++;
-			address = (address + PMD_SIZE) & PMD_MASK;
-			if (address >= vma->vm_end) {
+			address = vma->vm_start;
 next_vma:
-				vma = vma->vm_next;
-				if (!vma)
-					goto out;
-
-				mm->map_count--;
-				if (is_vm_hugetlb_page(vma)) {
+			prev_end = vm_end;
+			vm_end = vma->vm_end;
+			mm->map_count--;
+			/*
+			 * Advance the vma pointer to the next vma.
+			 * To facilitate coalescing adjacent vmas, the
+			 * pointer always points to the next one
+			 * beyond the range we're currently working
+			 * on, which means vma will be null on the
+			 * last iteration.
+			 */
+			vma = vma->vm_next;
+			if (vma) {
+				/*
+				 * Go ahead and include hugetlb vmas
+				 * in the range we process.  The pmd
+				 * entry will be cleared by close, so
+				 * we'll just skip over them.  This is
+				 * easier than trying to avoid them.
+				 */
+				if (is_vm_hugetlb_page(vma))
 					vma->vm_ops->close(vma);
+
+				/*
+				 * Coalesce adjacent vmas and process
+				 * them all in one iteration.
+				 */
+				if (vma->vm_start == prev_end) {
 					goto next_vma;
 				}
+			}
+		}
+		pgd = pgd_offset(mm, address);
+		do {
+			if (pgd_none(*pgd))
+				goto skip_pgd;
 
-				address = vma->vm_start;
-				end = ((address + PGDIR_SIZE) & PGDIR_MASK);
-				pgd = pgd_offset(mm, address);
-				pmd = pmd_offset(pgd, address);
+			if (pgd_bad(*pgd)) {
+				pgd_ERROR(*pgd);
+				pgd_clear(pgd);
+skip_pgd:
+				address += PGDIR_SIZE;
+				if (address > vm_end)
+					address = vm_end;
+				goto next_pgd;
 			}
-		} while (address < end);
-		pgd++;
-		pmd = pmd_offset(pgd, address);
-		end = ((address + PGDIR_SIZE) & PGDIR_MASK);
-	} while (vma);
+			pmd = pmd_offset(pgd, address);
+			if (vm_end > ((address + PGDIR_SIZE) & PGDIR_MASK))
+				pmd_end = (address + PGDIR_SIZE) & PGDIR_MASK;
+			else
+				pmd_end = vm_end;
+
+			for (;;) {
+				if (pmd_none(*pmd))
+					goto next_pmd;
+				if (pmd_bad(*pmd)) {
+					pmd_ERROR(*pmd);
+					pmd_clear(pmd);
+					goto next_pmd;
+				}
+				
+				ptepage = pmd_page(*pmd);
+				pte_page_lock(ptepage);
+				if (page_count(ptepage) > 1) {
+					pmd_clear(pmd);
+					pgtable_remove_rmap_locked(ptepage, mm);
+					mm->rss -= ptepage->private;
+					put_page(ptepage);
+				} else
+					zap_pte_range(tlb, pmd, address,
+						      vm_end - address);
+
+				pte_page_unlock(ptepage);
+next_pmd:
+				address += PMD_SIZE;
+				if (address >= pmd_end) {
+					address = pmd_end;
+					break;
+				}
+				pmd++;
+			}
+next_pgd:
+			pgd++;
+		} while (address < vm_end);
+
+	}
 
 out:
 	clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH 2.5.42-mm3] More shared page table fixes
  2002-10-15 15:40 [PATCH 2.5.42-mm3] More shared page table fixes Dave McCracken
@ 2002-10-16  6:07 ` Andrew Morton
  0 siblings, 0 replies; 2+ messages in thread
From: Andrew Morton @ 2002-10-16  6:07 UTC (permalink / raw)
  To: Dave McCracken; +Cc: Linux Memory Management

Dave McCracken wrote:
> 
> This patch gets the unmap_all_pages function right for PAE-enabled
> machines.  It also adds a forgotten spinlock to pte_try_to_share.
> 

This one's bad, Dave.  Machine keels over halfway through the first
dbench run with CONFIG_SHAREPTE=n.  General memory corruption and
mayhem.

Here's what I have - I'll drop it in experimental/ if I manage to get
another patchset ready tonight.



 mm/memory.c |  166 +++++++++++++++++++++++++++++++++++++-----------------------
 1 files changed, 103 insertions(+), 63 deletions(-)

--- 2.5.42/mm/memory.c~shpte-unmap_all_pages_fix	Tue Oct 15 14:18:42 2002
+++ 2.5.42-akpm/mm/memory.c	Tue Oct 15 14:18:42 2002
@@ -372,6 +372,7 @@ static pte_t *pte_try_to_share(struct mm
 	struct vm_area_struct *lvma;
 	struct page *ptepage;
 	unsigned long base;
+	pte_t *pte = NULL;
 
 	/*
 	 * It already has a pte page.  No point in checking further.
@@ -394,6 +395,8 @@ static pte_t *pte_try_to_share(struct mm
 
 	as = vma->vm_file->f_dentry->d_inode->i_mapping;
 
+	spin_lock(&as->i_shared_lock);
+
 	list_for_each_entry(lvma, &as->i_mmap_shared, shared) {
 		pgd_t *lpgd;
 		pmd_t *lpmd;
@@ -431,9 +434,11 @@ static pte_t *pte_try_to_share(struct mm
 		else
 			pmdval = pmd_wrprotect(*lpmd);
 		set_pmd(pmd, pmdval);
-		return pte_page_map(ptepage, address);
+		pte = pte_page_map(ptepage, address);
+		break;
 	}
-	return NULL;
+	spin_unlock(&as->i_shared_lock);
+	return pte;
 }
 #endif
 
@@ -846,14 +851,16 @@ static void zap_pmd_range(mmu_gather_t *
 	if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
 		end = ((address + PGDIR_SIZE) & PGDIR_MASK);
 	do {
-		ptepage = pmd_page(*pmd);
-		pte_page_lock(ptepage);
+		if (pmd_present(*pmd)) {
+			ptepage = pmd_page(*pmd);
+			pte_page_lock(ptepage);
 #ifdef CONFIG_SHAREPTE
-		if (page_count(ptepage) > 1)
-			BUG();
+			if (page_count(ptepage) > 1)
+				BUG();
 #endif
-		zap_pte_range(tlb, pmd, address, end - address);
-		pte_page_unlock(ptepage);
+			zap_pte_range(tlb, pmd, address, end - address);
+			pte_page_unlock(ptepage);
+		}
 		address = (address + PMD_SIZE) & PMD_MASK; 
 		pmd++;
 	} while (address < end);
@@ -938,72 +945,105 @@ void unmap_all_pages(struct mm_struct *m
 	mmu_gather_t *tlb;
 	pgd_t *pgd;
 	pmd_t *pmd;
-	unsigned long address;
-	unsigned long end;
+	unsigned long address = 0;
+	unsigned long vm_end = 0, prev_end, pmd_end;
 
 	tlb = tlb_gather_mmu(mm, 1);
 
 	vma = mm->mmap;
-	if (!vma)
-		goto out;
-
-	mm->map_count--;
-	if (is_vm_hugetlb_page(vma)) {
-		vma->vm_ops->close(vma);
-		goto next_vma;
-	}
-
-	address = vma->vm_start;
-	end = ((address + PGDIR_SIZE) & PGDIR_MASK);
+	for (;;) {
+		if (address >= vm_end) {
+			if (!vma)
+				goto out;
 
-	pgd = pgd_offset(mm, address);
-	pmd = pmd_offset(pgd, address);
-	do {
-		do {
-			if (pmd_none(*pmd))
-				goto skip_pmd;
-			if (pmd_bad(*pmd)) {
-				pmd_ERROR(*pmd);
-				pmd_clear(pmd);
-				goto skip_pmd;
-			}
-		
-			ptepage = pmd_page(*pmd);
-			pte_page_lock(ptepage);
-			if (page_count(ptepage) > 1) {
-				pmd_clear(pmd);
-				pgtable_remove_rmap_locked(ptepage, mm);
-				mm->rss -= ptepage->private;
-				put_page(ptepage);
-			} else {
-				zap_pte_range(tlb, pmd, address, end - address);
-			}
-			pte_page_unlock(ptepage);
-skip_pmd:
-			pmd++;
-			address = (address + PMD_SIZE) & PMD_MASK;
-			if (address >= vma->vm_end) {
+			address = vma->vm_start;
 next_vma:
-				vma = vma->vm_next;
-				if (!vma)
-					goto out;
-
-				mm->map_count--;
-				if (is_vm_hugetlb_page(vma)) {
+			prev_end = vm_end;
+			vm_end = vma->vm_end;
+			mm->map_count--;
+			/*
+			 * Advance the vma pointer to the next vma.
+			 * To facilitate coalescing adjacent vmas, the
+			 * pointer always points to the next one
+			 * beyond the range we're currently working
+			 * on, which means vma will be null on the
+			 * last iteration.
+			 */
+			vma = vma->vm_next;
+			if (vma) {
+				/*
+				 * Go ahead and include hugetlb vmas
+				 * in the range we process.  The pmd
+				 * entry will be cleared by close, so
+				 * we'll just skip over them.  This is
+				 * easier than trying to avoid them.
+				 */
+				if (is_vm_hugetlb_page(vma))
 					vma->vm_ops->close(vma);
+
+				/*
+				 * Coalesce adjacent vmas and process
+				 * them all in one iteration.
+				 */
+				if (vma->vm_start == prev_end) {
 					goto next_vma;
 				}
+			}
+		}
+		pgd = pgd_offset(mm, address);
+		do {
+			if (pgd_none(*pgd))
+				goto skip_pgd;
 
-				address = vma->vm_start;
-				end = ((address + PGDIR_SIZE) & PGDIR_MASK);
-				pgd = pgd_offset(mm, address);
-				pmd = pmd_offset(pgd, address);
+			if (pgd_bad(*pgd)) {
+				pgd_ERROR(*pgd);
+				pgd_clear(pgd);
+skip_pgd:
+				address += PGDIR_SIZE;
+				if (address > vm_end)
+					address = vm_end;
+				goto next_pgd;
 			}
-		} while (address < end);
-		pgd++;
-		pmd = pmd_offset(pgd, address);
-		end = ((address + PGDIR_SIZE) & PGDIR_MASK);
-	} while (vma);
+			pmd = pmd_offset(pgd, address);
+			if (vm_end > ((address + PGDIR_SIZE) & PGDIR_MASK))
+				pmd_end = (address + PGDIR_SIZE) & PGDIR_MASK;
+			else
+				pmd_end = vm_end;
+
+			for (;;) {
+				if (pmd_none(*pmd))
+					goto next_pmd;
+				if (pmd_bad(*pmd)) {
+					pmd_ERROR(*pmd);
+					pmd_clear(pmd);
+					goto next_pmd;
+				}
+				
+				ptepage = pmd_page(*pmd);
+				pte_page_lock(ptepage);
+				if (page_count(ptepage) > 1) {
+					pmd_clear(pmd);
+					pgtable_remove_rmap_locked(ptepage, mm);
+					mm->rss -= ptepage->private;
+					put_page(ptepage);
+				} else
+					zap_pte_range(tlb, pmd, address,
+						      vm_end - address);
+
+				pte_page_unlock(ptepage);
+next_pmd:
+				address += PMD_SIZE;
+				if (address >= pmd_end) {
+					address = pmd_end;
+					break;
+				}
+				pmd++;
+			}
+next_pgd:
+			pgd++;
+		} while (address < vm_end);
+
+	}
 
 out:
 	clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);

.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2002-10-16  6:07 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-10-15 15:40 [PATCH 2.5.42-mm3] More shared page table fixes Dave McCracken
2002-10-16  6:07 ` Andrew Morton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox