From: Dave McCracken <dmccr@us.ibm.com>
To: Andrew Morton <akpm@digeo.com>
Cc: Linux Memory Management <linux-mm@kvack.org>,
Linux Kernel <linux-kernel@vger.kernel.org>
Subject: [PATCH 2.5.41-mm3] Fix unmap for shared page tables
Date: Fri, 11 Oct 2002 12:10:38 -0500 [thread overview]
Message-ID: <65780000.1034356238@baldur.austin.ibm.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 403 bytes --]
I realized I got the unmap code wrong for shared page tables. Here's a
patch that fixes the problem plus optimizes the exit case. It should also
fix Paul Larson's BUG().
Dave McCracken
======================================================================
Dave McCracken IBM Linux Base Kernel Team 1-512-838-3059
dmccr@us.ibm.com T/L 678-3059
[-- Attachment #2: shpte-2.5.41-mm3-1.diff --]
[-- Type: text/plain, Size: 9805 bytes --]
--- 2.5.41-mm3/./mm/mmap.c 2002-10-11 10:54:43.000000000 -0500
+++ 2.5.41-mm3-shpte/./mm/mmap.c 2002-10-11 11:34:24.000000000 -0500
@@ -24,7 +24,10 @@
#include <asm/tlb.h>
extern void unmap_page_range(mmu_gather_t *,struct vm_area_struct *vma, unsigned long address, unsigned long size);
-extern void unmap_all_pages(mmu_gather_t *tlb, struct mm_struct *mm, unsigned long address, unsigned long end);
+#ifdef CONFIG_SHAREPTE
+extern void unmap_shared_range(struct mm_struct *mm, unsigned long address, unsigned long end);
+#endif
+extern void unmap_all_pages(struct mm_struct *mm);
extern void clear_page_tables(mmu_gather_t *tlb, unsigned long first, int nr);
/*
@@ -984,6 +987,10 @@
{
mmu_gather_t *tlb;
+#ifdef CONFIG_SHAREPTE
+ /* Make sure all the pte pages in the range are unshared if necessary */
+ unmap_shared_range(mm, start, end);
+#endif
tlb = tlb_gather_mmu(mm, 0);
do {
@@ -1267,9 +1274,7 @@
/* Release all mmaps. */
void exit_mmap(struct mm_struct * mm)
{
- mmu_gather_t *tlb;
struct vm_area_struct * mpnt;
- int unmap_vma = mm->total_vm < UNMAP_THRESHOLD;
profile_exit_mmap(mm);
@@ -1277,39 +1282,14 @@
spin_lock(&mm->page_table_lock);
- tlb = tlb_gather_mmu(mm, 1);
-
flush_cache_mm(mm);
- mpnt = mm->mmap;
- while (mpnt) {
- unsigned long start = mpnt->vm_start;
- unsigned long end = mpnt->vm_end;
- /*
- * If the VMA has been charged for, account for its
- * removal
- */
- if (mpnt->vm_flags & VM_ACCOUNT)
- vm_unacct_memory((end - start) >> PAGE_SHIFT);
-
- mm->map_count--;
- if (is_vm_hugetlb_page(mpnt))
- mpnt->vm_ops->close(mpnt);
- else if (unmap_vma)
- unmap_page_range(tlb, mpnt, start, end);
- mpnt = mpnt->vm_next;
- }
+ unmap_all_pages(mm);
/* This is just debugging */
if (mm->map_count)
BUG();
- if (!unmap_vma)
- unmap_all_pages(tlb, mm, 0, TASK_SIZE);
-
- clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
- tlb_finish_mmu(tlb, 0, TASK_SIZE);
-
mpnt = mm->mmap;
mm->mmap = mm->mmap_cache = NULL;
mm->mm_rb = RB_ROOT;
@@ -1325,6 +1305,14 @@
*/
while (mpnt) {
struct vm_area_struct * next = mpnt->vm_next;
+
+ /*
+ * If the VMA has been charged for, account for its
+ * removal
+ */
+ if (mpnt->vm_flags & VM_ACCOUNT)
+ vm_unacct_memory((mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT);
+
remove_shared_vm_struct(mpnt);
if (mpnt->vm_ops) {
if (mpnt->vm_ops->close)
--- 2.5.41-mm3/./mm/memory.c 2002-10-11 10:54:43.000000000 -0500
+++ 2.5.41-mm3-shpte/./mm/memory.c 2002-10-11 10:59:14.000000000 -0500
@@ -267,26 +267,34 @@
base = addr = oldpage->index;
page_end = base + PMD_SIZE;
vma = find_vma(mm, base);
- if (!vma || (page_end <= vma->vm_start))
- BUG(); /* No valid pages in this pte page */
src_unshare = page_count(oldpage) == 2;
dst_ptb = pte_page_map(newpage, base);
src_ptb = pte_page_map_nested(oldpage, base);
- if (vma->vm_start > addr)
- addr = vma->vm_start;
+ if (page_end <= vma->vm_start)
+ vma = NULL;
- if (vma->vm_end < page_end)
- end = vma->vm_end;
- else
- end = page_end;
+ if (vma) {
+ if (vma->vm_start > addr)
+ addr = vma->vm_start;
+
+ if (vma->vm_end < page_end)
+ end = vma->vm_end;
+ else
+ end = page_end;
+ } else {
+ addr = end = page_end;
+ }
do {
- unsigned int cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+ unsigned int cow = 0;
pte_t *src_pte = src_ptb + __pte_offset(addr);
pte_t *dst_pte = dst_ptb + __pte_offset(addr);
+ if (vma)
+ cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+
do {
pte_t pte = *src_pte;
struct page *page;
@@ -637,9 +645,71 @@
}
#endif
-static void zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
+#ifdef CONFIG_SHAREPTE
+static inline void unmap_shared_pmd(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long address, unsigned long end)
{
struct page *ptepage;
+ pmd_t * pmd;
+
+ if (pgd_none(*pgd))
+ return;
+ if (pgd_bad(*pgd)) {
+ pgd_ERROR(*pgd);
+ pgd_clear(pgd);
+ return;
+ }
+ pmd = pmd_offset(pgd, address);
+ if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
+ end = ((address + PGDIR_SIZE) & PGDIR_MASK);
+ do {
+ if (pmd_none(*pmd))
+ goto skip_pmd;
+ if (pmd_bad(*pmd)) {
+ pmd_ERROR(*pmd);
+ pmd_clear(pmd);
+ goto skip_pmd;
+ }
+
+ ptepage = pmd_page(*pmd);
+ pte_page_lock(ptepage);
+
+ if (page_count(ptepage) > 1) {
+ if ((address <= ptepage->index) &&
+ (end >= (ptepage->index + PMD_SIZE))) {
+ pmd_clear(pmd);
+ pgtable_remove_rmap_locked(ptepage, mm);
+ mm->rss -= ptepage->private;
+ put_page(ptepage);
+ } else {
+ pte_unshare(mm, pmd, address);
+ ptepage = pmd_page(*pmd);
+ }
+ }
+ pte_page_unlock(ptepage);
+skip_pmd:
+ address = (address + PMD_SIZE) & PMD_MASK;
+ pmd++;
+ } while (address < end);
+}
+
+void unmap_shared_range(struct mm_struct *mm, unsigned long address, unsigned long end)
+{
+ pgd_t * pgd;
+
+ if (address >= end)
+ BUG();
+ pgd = pgd_offset(mm, address);
+ do {
+ unmap_shared_pmd(mm, pgd, address, end - address);
+ address = (address + PGDIR_SIZE) & PGDIR_MASK;
+ pgd++;
+ } while (address && (address < end));
+}
+#endif
+
+static void zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
+{
unsigned long offset;
pte_t *ptep;
@@ -656,29 +726,7 @@
size = PMD_SIZE - offset;
size &= PAGE_MASK;
- /*
- * Check to see if the pte page is shared. If it is and we're unmapping
- * the entire page, just decrement the reference count and we're done.
- * If we're only unmapping part of the page we'll have to unshare it the
- * slow way.
- */
- ptepage = pmd_page(*pmd);
- pte_page_lock(ptepage);
-#ifdef CONFIG_SHAREPTE
- if (page_count(ptepage) > 1) {
- if ((offset == 0) && (size == PMD_SIZE)) {
- pmd_clear(pmd);
- pgtable_remove_rmap_locked(ptepage, tlb->mm);
- tlb->mm->rss -= ptepage->private;
- put_page(ptepage);
- pte_page_unlock(ptepage);
- return;
- }
- ptep = pte_unshare(tlb->mm, pmd, address);
- ptepage = pmd_page(*pmd);
- } else
-#endif
- ptep = pte_offset_map(pmd, address);
+ ptep = pte_offset_map(pmd, address);
for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
pte_t pte = *ptep;
@@ -707,12 +755,12 @@
pte_clear(ptep);
}
}
- pte_page_unlock(ptepage);
pte_unmap(ptep-1);
}
static void zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size)
{
+ struct page *ptepage;
pmd_t * pmd;
unsigned long end;
@@ -728,7 +776,14 @@
if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
end = ((address + PGDIR_SIZE) & PGDIR_MASK);
do {
+ ptepage = pmd_page(*pmd);
+ pte_page_lock(ptepage);
+#ifdef CONFIG_SHAREPTE
+ if (page_count(ptepage) > 1)
+ BUG();
+#endif
zap_pte_range(tlb, pmd, address, end - address);
+ pte_page_unlock(ptepage);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address < end);
@@ -779,6 +834,9 @@
spin_lock(&mm->page_table_lock);
+#ifdef CONFIG_SHAREPTE
+ unmap_shared_range(mm, address, address + size);
+#endif
/*
* This was once a long-held spinlock. Now we break the
* work up into ZAP_BLOCK_SIZE units and relinquish the
@@ -803,19 +861,85 @@
spin_unlock(&mm->page_table_lock);
}
-void unmap_all_pages(mmu_gather_t *tlb, struct mm_struct *mm, unsigned long address, unsigned long end)
+void unmap_all_pages(struct mm_struct *mm)
{
- pgd_t * dir;
+ struct vm_area_struct *vma;
+ struct page *ptepage;
+ mmu_gather_t *tlb;
+ pgd_t *pgd;
+ pmd_t *pmd;
+ unsigned long address;
+ unsigned long end;
- if (address >= end)
- BUG();
- dir = pgd_offset(mm, address);
+ tlb = tlb_gather_mmu(mm, 1);
+
+ vma = mm->mmap;
+ if (!vma)
+ goto out;
+
+ mm->map_count--;
+ if (is_vm_hugetlb_page(vma)) {
+ vma->vm_ops->close(vma);
+ goto next_vma;
+ }
+
+ address = vma->vm_start;
+ end = ((address + PGDIR_SIZE) & PGDIR_MASK);
+
+ pgd = pgd_offset(mm, address);
+ pmd = pmd_offset(pgd, address);
do {
- zap_pmd_range(tlb, dir, address, end - address);
- address = (address + PGDIR_SIZE) & PGDIR_MASK;
- dir++;
- } while (address && (address < end));
+ do {
+ if (pmd_none(*pmd))
+ goto skip_pmd;
+ if (pmd_bad(*pmd)) {
+ pmd_ERROR(*pmd);
+ pmd_clear(pmd);
+ goto skip_pmd;
+ }
+
+ ptepage = pmd_page(*pmd);
+ pte_page_lock(ptepage);
+ if (page_count(ptepage) > 1) {
+ pmd_clear(pmd);
+ pgtable_remove_rmap_locked(ptepage, mm);
+ mm->rss -= ptepage->private;
+ put_page(ptepage);
+ } else {
+ zap_pte_range(tlb, pmd, address, end - address);
+ }
+ pte_page_unlock(ptepage);
+skip_pmd:
+ pmd++;
+ address = (address + PMD_SIZE) & PMD_MASK;
+ if (address >= vma->vm_end) {
+next_vma:
+ vma = vma->vm_next;
+ if (!vma)
+ goto out;
+
+ mm->map_count--;
+ if (is_vm_hugetlb_page(vma)) {
+ vma->vm_ops->close(vma);
+ goto next_vma;
+ }
+
+ address = vma->vm_start;
+ end = ((address + PGDIR_SIZE) & PGDIR_MASK);
+ pgd = pgd_offset(mm, address);
+ pmd = pmd_offset(pgd, address);
+ }
+ } while (address < end);
+ pgd++;
+ pmd = pmd_offset(pgd, address);
+ end = ((address + PGDIR_SIZE) & PGDIR_MASK);
+ } while (vma);
+
+out:
+ clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
+ tlb_finish_mmu(tlb, 0, TASK_SIZE);
}
+
/*
* Do a quick page-table lookup for a single page.
* mm->page_table_lock must be held.
next reply other threads:[~2002-10-11 17:10 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2002-10-11 17:10 Dave McCracken [this message]
2002-10-11 19:24 ` Paul Larson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=65780000.1034356238@baldur.austin.ibm.com \
--to=dmccr@us.ibm.com \
--cc=akpm@digeo.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox