From: Andrew Morton <akpm@digeo.com>
To: Dave McCracken <dmccr@us.ibm.com>
Cc: Linux Memory Management <linux-mm@kvack.org>
Subject: Re: [PATCH 2.5.42-mm3] More shared page table fixes
Date: Tue, 15 Oct 2002 23:07:11 -0700 [thread overview]
Message-ID: <3DAD020F.1AC3D34F@digeo.com> (raw)
In-Reply-To: <75990000.1034696450@baldur.austin.ibm.com>
Dave McCracken wrote:
>
> This patch gets the unmap_all_pages function right for PAE-enabled
> machines. It also adds a forgotten spinlock to pte_try_to_share.
>
This one's bad, Dave. Machine keels over halfway through the first
dbench run with CONFIG_SHAREPTE=n. General memory corruption and
mayhem.
Here's what I have - I'll drop it in experimental/ if I manage to get
another patchset ready tonight.
mm/memory.c | 166 +++++++++++++++++++++++++++++++++++++-----------------------
1 files changed, 103 insertions(+), 63 deletions(-)
--- 2.5.42/mm/memory.c~shpte-unmap_all_pages_fix Tue Oct 15 14:18:42 2002
+++ 2.5.42-akpm/mm/memory.c Tue Oct 15 14:18:42 2002
@@ -372,6 +372,7 @@ static pte_t *pte_try_to_share(struct mm
struct vm_area_struct *lvma;
struct page *ptepage;
unsigned long base;
+ pte_t *pte = NULL;
/*
* It already has a pte page. No point in checking further.
@@ -394,6 +395,8 @@ static pte_t *pte_try_to_share(struct mm
as = vma->vm_file->f_dentry->d_inode->i_mapping;
+ spin_lock(&as->i_shared_lock);
+
list_for_each_entry(lvma, &as->i_mmap_shared, shared) {
pgd_t *lpgd;
pmd_t *lpmd;
@@ -431,9 +434,11 @@ static pte_t *pte_try_to_share(struct mm
else
pmdval = pmd_wrprotect(*lpmd);
set_pmd(pmd, pmdval);
- return pte_page_map(ptepage, address);
+ pte = pte_page_map(ptepage, address);
+ break;
}
- return NULL;
+ spin_unlock(&as->i_shared_lock);
+ return pte;
}
#endif
@@ -846,14 +851,16 @@ static void zap_pmd_range(mmu_gather_t *
if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
end = ((address + PGDIR_SIZE) & PGDIR_MASK);
do {
- ptepage = pmd_page(*pmd);
- pte_page_lock(ptepage);
+ if (pmd_present(*pmd)) {
+ ptepage = pmd_page(*pmd);
+ pte_page_lock(ptepage);
#ifdef CONFIG_SHAREPTE
- if (page_count(ptepage) > 1)
- BUG();
+ if (page_count(ptepage) > 1)
+ BUG();
#endif
- zap_pte_range(tlb, pmd, address, end - address);
- pte_page_unlock(ptepage);
+ zap_pte_range(tlb, pmd, address, end - address);
+ pte_page_unlock(ptepage);
+ }
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address < end);
@@ -938,72 +945,105 @@ void unmap_all_pages(struct mm_struct *m
mmu_gather_t *tlb;
pgd_t *pgd;
pmd_t *pmd;
- unsigned long address;
- unsigned long end;
+ unsigned long address = 0;
+ unsigned long vm_end = 0, prev_end, pmd_end;
tlb = tlb_gather_mmu(mm, 1);
vma = mm->mmap;
- if (!vma)
- goto out;
-
- mm->map_count--;
- if (is_vm_hugetlb_page(vma)) {
- vma->vm_ops->close(vma);
- goto next_vma;
- }
-
- address = vma->vm_start;
- end = ((address + PGDIR_SIZE) & PGDIR_MASK);
+ for (;;) {
+ if (address >= vm_end) {
+ if (!vma)
+ goto out;
- pgd = pgd_offset(mm, address);
- pmd = pmd_offset(pgd, address);
- do {
- do {
- if (pmd_none(*pmd))
- goto skip_pmd;
- if (pmd_bad(*pmd)) {
- pmd_ERROR(*pmd);
- pmd_clear(pmd);
- goto skip_pmd;
- }
-
- ptepage = pmd_page(*pmd);
- pte_page_lock(ptepage);
- if (page_count(ptepage) > 1) {
- pmd_clear(pmd);
- pgtable_remove_rmap_locked(ptepage, mm);
- mm->rss -= ptepage->private;
- put_page(ptepage);
- } else {
- zap_pte_range(tlb, pmd, address, end - address);
- }
- pte_page_unlock(ptepage);
-skip_pmd:
- pmd++;
- address = (address + PMD_SIZE) & PMD_MASK;
- if (address >= vma->vm_end) {
+ address = vma->vm_start;
next_vma:
- vma = vma->vm_next;
- if (!vma)
- goto out;
-
- mm->map_count--;
- if (is_vm_hugetlb_page(vma)) {
+ prev_end = vm_end;
+ vm_end = vma->vm_end;
+ mm->map_count--;
+ /*
+ * Advance the vma pointer to the next vma.
+ * To facilitate coalescing adjacent vmas, the
+ * pointer always points to the next one
+ * beyond the range we're currently working
+ * on, which means vma will be null on the
+ * last iteration.
+ */
+ vma = vma->vm_next;
+ if (vma) {
+ /*
+ * Go ahead and include hugetlb vmas
+ * in the range we process. The pmd
+ * entry will be cleared by close, so
+ * we'll just skip over them. This is
+ * easier than trying to avoid them.
+ */
+ if (is_vm_hugetlb_page(vma))
vma->vm_ops->close(vma);
+
+ /*
+ * Coalesce adjacent vmas and process
+ * them all in one iteration.
+ */
+ if (vma->vm_start == prev_end) {
goto next_vma;
}
+ }
+ }
+ pgd = pgd_offset(mm, address);
+ do {
+ if (pgd_none(*pgd))
+ goto skip_pgd;
- address = vma->vm_start;
- end = ((address + PGDIR_SIZE) & PGDIR_MASK);
- pgd = pgd_offset(mm, address);
- pmd = pmd_offset(pgd, address);
+ if (pgd_bad(*pgd)) {
+ pgd_ERROR(*pgd);
+ pgd_clear(pgd);
+skip_pgd:
+ address += PGDIR_SIZE;
+ if (address > vm_end)
+ address = vm_end;
+ goto next_pgd;
}
- } while (address < end);
- pgd++;
- pmd = pmd_offset(pgd, address);
- end = ((address + PGDIR_SIZE) & PGDIR_MASK);
- } while (vma);
+ pmd = pmd_offset(pgd, address);
+ if (vm_end > ((address + PGDIR_SIZE) & PGDIR_MASK))
+ pmd_end = (address + PGDIR_SIZE) & PGDIR_MASK;
+ else
+ pmd_end = vm_end;
+
+ for (;;) {
+ if (pmd_none(*pmd))
+ goto next_pmd;
+ if (pmd_bad(*pmd)) {
+ pmd_ERROR(*pmd);
+ pmd_clear(pmd);
+ goto next_pmd;
+ }
+
+ ptepage = pmd_page(*pmd);
+ pte_page_lock(ptepage);
+ if (page_count(ptepage) > 1) {
+ pmd_clear(pmd);
+ pgtable_remove_rmap_locked(ptepage, mm);
+ mm->rss -= ptepage->private;
+ put_page(ptepage);
+ } else
+ zap_pte_range(tlb, pmd, address,
+ vm_end - address);
+
+ pte_page_unlock(ptepage);
+next_pmd:
+ address += PMD_SIZE;
+ if (address >= pmd_end) {
+ address = pmd_end;
+ break;
+ }
+ pmd++;
+ }
+next_pgd:
+ pgd++;
+ } while (address < vm_end);
+
+ }
out:
clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
prev parent reply other threads:[~2002-10-16 6:07 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2002-10-15 15:40 Dave McCracken
2002-10-16 6:07 ` Andrew Morton [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3DAD020F.1AC3D34F@digeo.com \
--to=akpm@digeo.com \
--cc=dmccr@us.ibm.com \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox