--- 2.5.41-mm3/./mm/mmap.c 2002-10-11 10:54:43.000000000 -0500 +++ 2.5.41-mm3-shpte/./mm/mmap.c 2002-10-11 11:34:24.000000000 -0500 @@ -24,7 +24,10 @@ #include extern void unmap_page_range(mmu_gather_t *,struct vm_area_struct *vma, unsigned long address, unsigned long size); -extern void unmap_all_pages(mmu_gather_t *tlb, struct mm_struct *mm, unsigned long address, unsigned long end); +#ifdef CONFIG_SHAREPTE +extern void unmap_shared_range(struct mm_struct *mm, unsigned long address, unsigned long end); +#endif +extern void unmap_all_pages(struct mm_struct *mm); extern void clear_page_tables(mmu_gather_t *tlb, unsigned long first, int nr); /* @@ -984,6 +987,10 @@ { mmu_gather_t *tlb; +#ifdef CONFIG_SHAREPTE + /* Make sure all the pte pages in the range are unshared if necessary */ + unmap_shared_range(mm, start, end); +#endif tlb = tlb_gather_mmu(mm, 0); do { @@ -1267,9 +1274,7 @@ /* Release all mmaps. */ void exit_mmap(struct mm_struct * mm) { - mmu_gather_t *tlb; struct vm_area_struct * mpnt; - int unmap_vma = mm->total_vm < UNMAP_THRESHOLD; profile_exit_mmap(mm); @@ -1277,39 +1282,14 @@ spin_lock(&mm->page_table_lock); - tlb = tlb_gather_mmu(mm, 1); - flush_cache_mm(mm); - mpnt = mm->mmap; - while (mpnt) { - unsigned long start = mpnt->vm_start; - unsigned long end = mpnt->vm_end; - /* - * If the VMA has been charged for, account for its - * removal - */ - if (mpnt->vm_flags & VM_ACCOUNT) - vm_unacct_memory((end - start) >> PAGE_SHIFT); - - mm->map_count--; - if (is_vm_hugetlb_page(mpnt)) - mpnt->vm_ops->close(mpnt); - else if (unmap_vma) - unmap_page_range(tlb, mpnt, start, end); - mpnt = mpnt->vm_next; - } + unmap_all_pages(mm); /* This is just debugging */ if (mm->map_count) BUG(); - if (!unmap_vma) - unmap_all_pages(tlb, mm, 0, TASK_SIZE); - - clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD); - tlb_finish_mmu(tlb, 0, TASK_SIZE); - mpnt = mm->mmap; mm->mmap = mm->mmap_cache = NULL; mm->mm_rb = RB_ROOT; @@ -1325,6 +1305,14 @@ */ while (mpnt) { struct vm_area_struct * next = mpnt->vm_next; + + /* + * If the VMA has been charged for, account for its + * removal + */ + if (mpnt->vm_flags & VM_ACCOUNT) + vm_unacct_memory((mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT); + remove_shared_vm_struct(mpnt); if (mpnt->vm_ops) { if (mpnt->vm_ops->close) --- 2.5.41-mm3/./mm/memory.c 2002-10-11 10:54:43.000000000 -0500 +++ 2.5.41-mm3-shpte/./mm/memory.c 2002-10-11 10:59:14.000000000 -0500 @@ -267,26 +267,34 @@ base = addr = oldpage->index; page_end = base + PMD_SIZE; vma = find_vma(mm, base); - if (!vma || (page_end <= vma->vm_start)) - BUG(); /* No valid pages in this pte page */ src_unshare = page_count(oldpage) == 2; dst_ptb = pte_page_map(newpage, base); src_ptb = pte_page_map_nested(oldpage, base); - if (vma->vm_start > addr) - addr = vma->vm_start; + if (page_end <= vma->vm_start) + vma = NULL; - if (vma->vm_end < page_end) - end = vma->vm_end; - else - end = page_end; + if (vma) { + if (vma->vm_start > addr) + addr = vma->vm_start; + + if (vma->vm_end < page_end) + end = vma->vm_end; + else + end = page_end; + } else { + addr = end = page_end; + } do { - unsigned int cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; + unsigned int cow = 0; pte_t *src_pte = src_ptb + __pte_offset(addr); pte_t *dst_pte = dst_ptb + __pte_offset(addr); + if (vma) + cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; + do { pte_t pte = *src_pte; struct page *page; @@ -637,9 +645,71 @@ } #endif -static void zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size) +#ifdef CONFIG_SHAREPTE +static inline void unmap_shared_pmd(struct mm_struct *mm, pgd_t *pgd, + unsigned long address, unsigned long end) { struct page *ptepage; + pmd_t * pmd; + + if (pgd_none(*pgd)) + return; + if (pgd_bad(*pgd)) { + pgd_ERROR(*pgd); + pgd_clear(pgd); + return; + } + pmd = pmd_offset(pgd, address); + if (end > ((address + PGDIR_SIZE) & PGDIR_MASK)) + end = ((address + PGDIR_SIZE) & PGDIR_MASK); + do { + if (pmd_none(*pmd)) + goto skip_pmd; + if (pmd_bad(*pmd)) { + pmd_ERROR(*pmd); + pmd_clear(pmd); + goto skip_pmd; + } + + ptepage = pmd_page(*pmd); + pte_page_lock(ptepage); + + if (page_count(ptepage) > 1) { + if ((address <= ptepage->index) && + (end >= (ptepage->index + PMD_SIZE))) { + pmd_clear(pmd); + pgtable_remove_rmap_locked(ptepage, mm); + mm->rss -= ptepage->private; + put_page(ptepage); + } else { + pte_unshare(mm, pmd, address); + ptepage = pmd_page(*pmd); + } + } + pte_page_unlock(ptepage); +skip_pmd: + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address < end); +} + +void unmap_shared_range(struct mm_struct *mm, unsigned long address, unsigned long end) +{ + pgd_t * pgd; + + if (address >= end) + BUG(); + pgd = pgd_offset(mm, address); + do { + unmap_shared_pmd(mm, pgd, address, end - address); + address = (address + PGDIR_SIZE) & PGDIR_MASK; + pgd++; + } while (address && (address < end)); +} +#endif + +static void zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size) +{ unsigned long offset; pte_t *ptep; @@ -656,29 +726,7 @@ size = PMD_SIZE - offset; size &= PAGE_MASK; - /* - * Check to see if the pte page is shared. If it is and we're unmapping - * the entire page, just decrement the reference count and we're done. - * If we're only unmapping part of the page we'll have to unshare it the - * slow way. - */ - ptepage = pmd_page(*pmd); - pte_page_lock(ptepage); -#ifdef CONFIG_SHAREPTE - if (page_count(ptepage) > 1) { - if ((offset == 0) && (size == PMD_SIZE)) { - pmd_clear(pmd); - pgtable_remove_rmap_locked(ptepage, tlb->mm); - tlb->mm->rss -= ptepage->private; - put_page(ptepage); - pte_page_unlock(ptepage); - return; - } - ptep = pte_unshare(tlb->mm, pmd, address); - ptepage = pmd_page(*pmd); - } else -#endif - ptep = pte_offset_map(pmd, address); + ptep = pte_offset_map(pmd, address); for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) { pte_t pte = *ptep; @@ -707,12 +755,12 @@ pte_clear(ptep); } } - pte_page_unlock(ptepage); pte_unmap(ptep-1); } static void zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size) { + struct page *ptepage; pmd_t * pmd; unsigned long end; @@ -728,7 +776,14 @@ if (end > ((address + PGDIR_SIZE) & PGDIR_MASK)) end = ((address + PGDIR_SIZE) & PGDIR_MASK); do { + ptepage = pmd_page(*pmd); + pte_page_lock(ptepage); +#ifdef CONFIG_SHAREPTE + if (page_count(ptepage) > 1) + BUG(); +#endif zap_pte_range(tlb, pmd, address, end - address); + pte_page_unlock(ptepage); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address < end); @@ -779,6 +834,9 @@ spin_lock(&mm->page_table_lock); +#ifdef CONFIG_SHAREPTE + unmap_shared_range(mm, address, address + size); +#endif /* * This was once a long-held spinlock. Now we break the * work up into ZAP_BLOCK_SIZE units and relinquish the @@ -803,19 +861,85 @@ spin_unlock(&mm->page_table_lock); } -void unmap_all_pages(mmu_gather_t *tlb, struct mm_struct *mm, unsigned long address, unsigned long end) +void unmap_all_pages(struct mm_struct *mm) { - pgd_t * dir; + struct vm_area_struct *vma; + struct page *ptepage; + mmu_gather_t *tlb; + pgd_t *pgd; + pmd_t *pmd; + unsigned long address; + unsigned long end; - if (address >= end) - BUG(); - dir = pgd_offset(mm, address); + tlb = tlb_gather_mmu(mm, 1); + + vma = mm->mmap; + if (!vma) + goto out; + + mm->map_count--; + if (is_vm_hugetlb_page(vma)) { + vma->vm_ops->close(vma); + goto next_vma; + } + + address = vma->vm_start; + end = ((address + PGDIR_SIZE) & PGDIR_MASK); + + pgd = pgd_offset(mm, address); + pmd = pmd_offset(pgd, address); do { - zap_pmd_range(tlb, dir, address, end - address); - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); + do { + if (pmd_none(*pmd)) + goto skip_pmd; + if (pmd_bad(*pmd)) { + pmd_ERROR(*pmd); + pmd_clear(pmd); + goto skip_pmd; + } + + ptepage = pmd_page(*pmd); + pte_page_lock(ptepage); + if (page_count(ptepage) > 1) { + pmd_clear(pmd); + pgtable_remove_rmap_locked(ptepage, mm); + mm->rss -= ptepage->private; + put_page(ptepage); + } else { + zap_pte_range(tlb, pmd, address, end - address); + } + pte_page_unlock(ptepage); +skip_pmd: + pmd++; + address = (address + PMD_SIZE) & PMD_MASK; + if (address >= vma->vm_end) { +next_vma: + vma = vma->vm_next; + if (!vma) + goto out; + + mm->map_count--; + if (is_vm_hugetlb_page(vma)) { + vma->vm_ops->close(vma); + goto next_vma; + } + + address = vma->vm_start; + end = ((address + PGDIR_SIZE) & PGDIR_MASK); + pgd = pgd_offset(mm, address); + pmd = pmd_offset(pgd, address); + } + } while (address < end); + pgd++; + pmd = pmd_offset(pgd, address); + end = ((address + PGDIR_SIZE) & PGDIR_MASK); + } while (vma); + +out: + clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD); + tlb_finish_mmu(tlb, 0, TASK_SIZE); } + /* * Do a quick page-table lookup for a single page. * mm->page_table_lock must be held.