From: Nick Piggin <nickpiggin@yahoo.com.au>
To: Linus Torvalds <torvalds@osdl.org>
Cc: Andrew Morton <akpm@osdl.org>, Andi Kleen <ak@suse.de>,
Hugh Dickins <hugh@veritas.com>,
Linux Memory Management <linux-mm@kvack.org>
Subject: [PATCH 4/11] split copy_page_range
Date: Wed, 22 Dec 2004 20:54:59 +1100 [thread overview]
Message-ID: <41C94473.7050804@yahoo.com.au> (raw)
In-Reply-To: <41C94449.20004@yahoo.com.au>
[-- Attachment #1: Type: text/plain, Size: 5 bytes --]
4/11
[-- Attachment #2: 3level-split-copy_page_range.patch --]
[-- Type: text/plain, Size: 8698 bytes --]
Split copy_page_range into the usual set of page table walking functions.
Needed to handle the complexity when moving to 4 levels.
Split out from Andi Kleen's 4level patch.
Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
---
linux-2.6-npiggin/mm/memory.c | 290 ++++++++++++++++++++++--------------------
1 files changed, 152 insertions(+), 138 deletions(-)
diff -puN mm/memory.c~3level-split-copy_page_range mm/memory.c
--- linux-2.6/mm/memory.c~3level-split-copy_page_range 2004-12-22 20:31:44.000000000 +1100
+++ linux-2.6-npiggin/mm/memory.c 2004-12-22 20:35:58.000000000 +1100
@@ -204,165 +204,179 @@ pte_t fastcall * pte_alloc_kernel(struct
out:
return pte_offset_kernel(pmd, address);
}
-#define PTE_TABLE_MASK ((PTRS_PER_PTE-1) * sizeof(pte_t))
-#define PMD_TABLE_MASK ((PTRS_PER_PMD-1) * sizeof(pmd_t))
/*
* copy one vm_area from one task to the other. Assumes the page tables
* already present in the new task to be cleared in the whole range
* covered by this vma.
*
- * 08Jan98 Merged into one routine from several inline routines to reduce
- * variable count and make things faster. -jj
- *
* dst->page_table_lock is held on entry and exit,
- * but may be dropped within pmd_alloc() and pte_alloc_map().
+ * but may be dropped within p[mg]d_alloc() and pte_alloc_map().
*/
+
+static inline void
+copy_swap_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t pte)
+{
+ if (pte_file(pte))
+ return;
+ swap_duplicate(pte_to_swp_entry(pte));
+ if (list_empty(&dst_mm->mmlist)) {
+ spin_lock(&mmlist_lock);
+ list_add(&dst_mm->mmlist, &src_mm->mmlist);
+ spin_unlock(&mmlist_lock);
+ }
+}
+
+static inline void
+copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags,
+ unsigned long addr)
+{
+ pte_t pte = *src_pte;
+ struct page *page;
+ unsigned long pfn;
+
+ /* pte contains position in swap, so copy. */
+ if (!pte_present(pte)) {
+ copy_swap_pte(dst_mm, src_mm, pte);
+ set_pte(dst_pte, pte);
+ return;
+ }
+ pfn = pte_pfn(pte);
+ /* the pte points outside of valid memory, the
+ * mapping is assumed to be good, meaningful
+ * and not mapped via rmap - duplicate the
+ * mapping as is.
+ */
+ page = NULL;
+ if (pfn_valid(pfn))
+ page = pfn_to_page(pfn);
+
+ if (!page || PageReserved(page)) {
+ set_pte(dst_pte, pte);
+ return;
+ }
+
+ /*
+ * If it's a COW mapping, write protect it both
+ * in the parent and the child
+ */
+ if ((vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) {
+ ptep_set_wrprotect(src_pte);
+ pte = *src_pte;
+ }
+
+ /*
+ * If it's a shared mapping, mark it clean in
+ * the child
+ */
+ if (vm_flags & VM_SHARED)
+ pte = pte_mkclean(pte);
+ pte = pte_mkold(pte);
+ get_page(page);
+ dst_mm->rss++;
+ if (PageAnon(page))
+ dst_mm->anon_rss++;
+ set_pte(dst_pte, pte);
+ page_dup_rmap(page);
+}
+
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end)
+{
+ pte_t *src_pte, *dst_pte;
+ pte_t *s, *d;
+ unsigned long vm_flags = vma->vm_flags;
+
+ d = dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
+ if (!dst_pte)
+ return -ENOMEM;
+
+ spin_lock(&src_mm->page_table_lock);
+ s = src_pte = pte_offset_map_nested(src_pmd, addr);
+ for (; addr < end; addr += PAGE_SIZE, s++, d++) {
+ if (pte_none(*s))
+ continue;
+ copy_one_pte(dst_mm, src_mm, d, s, vm_flags, addr);
+ }
+ pte_unmap_nested(src_pte);
+ pte_unmap(dst_pte);
+ spin_unlock(&src_mm->page_table_lock);
+ cond_resched_lock(&dst_mm->page_table_lock);
+ return 0;
+}
+
+static int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end)
+{
+ pmd_t *src_pmd, *dst_pmd;
+ int err = 0;
+ unsigned long next;
+
+ src_pmd = pmd_offset(src_pgd, addr);
+ dst_pmd = pmd_alloc(dst_mm, dst_pgd, addr);
+ if (!dst_pmd)
+ return -ENOMEM;
+
+ for (; addr < end; addr = next, src_pmd++, dst_pmd++) {
+ next = (addr + PMD_SIZE) & PMD_MASK;
+ if (next > end)
+ next = end;
+ if (pmd_none(*src_pmd))
+ continue;
+ if (pmd_bad(*src_pmd)) {
+ pmd_ERROR(*src_pmd);
+ pmd_clear(src_pmd);
+ continue;
+ }
+ err = copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
+ vma, addr, next);
+ if (err)
+ break;
+ }
+ return err;
+}
+
int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
- struct vm_area_struct *vma)
+ struct vm_area_struct *vma)
{
- pgd_t * src_pgd, * dst_pgd;
- unsigned long address = vma->vm_start;
- unsigned long end = vma->vm_end;
- unsigned long cow;
+ pgd_t *src_pgd, *dst_pgd;
+ unsigned long addr, start, end, next;
+ int err = 0;
if (is_vm_hugetlb_page(vma))
return copy_hugetlb_page_range(dst, src, vma);
- cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
- src_pgd = pgd_offset(src, address)-1;
- dst_pgd = pgd_offset(dst, address)-1;
-
- for (;;) {
- pmd_t * src_pmd, * dst_pmd;
-
- src_pgd++; dst_pgd++;
-
- /* copy_pmd_range */
-
+ start = vma->vm_start;
+ src_pgd = pgd_offset(src, start);
+ dst_pgd = pgd_offset(dst, start);
+
+ end = vma->vm_end;
+ addr = start;
+ while (addr && (addr < end-1)) {
+ next = (addr + PGDIR_SIZE) & PGDIR_MASK;
+ if (next > end || next <= addr)
+ next = end;
if (pgd_none(*src_pgd))
- goto skip_copy_pmd_range;
- if (unlikely(pgd_bad(*src_pgd))) {
+ continue;
+ if (pgd_bad(*src_pgd)) {
pgd_ERROR(*src_pgd);
pgd_clear(src_pgd);
-skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK;
- if (!address || (address >= end))
- goto out;
continue;
}
+ err = copy_pmd_range(dst, src, dst_pgd, src_pgd,
+ vma, addr, next);
+ if (err)
+ break;
- src_pmd = pmd_offset(src_pgd, address);
- dst_pmd = pmd_alloc(dst, dst_pgd, address);
- if (!dst_pmd)
- goto nomem;
-
- do {
- pte_t * src_pte, * dst_pte;
-
- /* copy_pte_range */
-
- if (pmd_none(*src_pmd))
- goto skip_copy_pte_range;
- if (unlikely(pmd_bad(*src_pmd))) {
- pmd_ERROR(*src_pmd);
- pmd_clear(src_pmd);
-skip_copy_pte_range:
- address = (address + PMD_SIZE) & PMD_MASK;
- if (address >= end)
- goto out;
- goto cont_copy_pmd_range;
- }
-
- dst_pte = pte_alloc_map(dst, dst_pmd, address);
- if (!dst_pte)
- goto nomem;
- spin_lock(&src->page_table_lock);
- src_pte = pte_offset_map_nested(src_pmd, address);
- do {
- pte_t pte = *src_pte;
- struct page *page;
- unsigned long pfn;
-
- /* copy_one_pte */
-
- if (pte_none(pte))
- goto cont_copy_pte_range_noset;
- /* pte contains position in swap, so copy. */
- if (!pte_present(pte)) {
- if (!pte_file(pte)) {
- swap_duplicate(pte_to_swp_entry(pte));
- if (list_empty(&dst->mmlist)) {
- spin_lock(&mmlist_lock);
- list_add(&dst->mmlist,
- &src->mmlist);
- spin_unlock(&mmlist_lock);
- }
- }
- set_pte(dst_pte, pte);
- goto cont_copy_pte_range_noset;
- }
- pfn = pte_pfn(pte);
- /* the pte points outside of valid memory, the
- * mapping is assumed to be good, meaningful
- * and not mapped via rmap - duplicate the
- * mapping as is.
- */
- page = NULL;
- if (pfn_valid(pfn))
- page = pfn_to_page(pfn);
-
- if (!page || PageReserved(page)) {
- set_pte(dst_pte, pte);
- goto cont_copy_pte_range_noset;
- }
-
- /*
- * If it's a COW mapping, write protect it both
- * in the parent and the child
- */
- if (cow) {
- ptep_set_wrprotect(src_pte);
- pte = *src_pte;
- }
-
- /*
- * If it's a shared mapping, mark it clean in
- * the child
- */
- if (vma->vm_flags & VM_SHARED)
- pte = pte_mkclean(pte);
- pte = pte_mkold(pte);
- get_page(page);
- dst->rss++;
- if (PageAnon(page))
- dst->anon_rss++;
- set_pte(dst_pte, pte);
- page_dup_rmap(page);
-cont_copy_pte_range_noset:
- address += PAGE_SIZE;
- if (address >= end) {
- pte_unmap_nested(src_pte);
- pte_unmap(dst_pte);
- goto out_unlock;
- }
- src_pte++;
- dst_pte++;
- } while ((unsigned long)src_pte & PTE_TABLE_MASK);
- pte_unmap_nested(src_pte-1);
- pte_unmap(dst_pte-1);
- spin_unlock(&src->page_table_lock);
- cond_resched_lock(&dst->page_table_lock);
-cont_copy_pmd_range:
- src_pmd++;
- dst_pmd++;
- } while ((unsigned long)src_pmd & PMD_TABLE_MASK);
+ src_pgd++;
+ dst_pgd++;
+ addr = next;
}
-out_unlock:
- spin_unlock(&src->page_table_lock);
-out:
- return 0;
-nomem:
- return -ENOMEM;
+
+ return err;
}
static void zap_pte_range(struct mmu_gather *tlb,
_
next prev parent reply other threads:[~2004-12-22 9:54 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-12-22 9:50 [PATCH 0/11] alternate 4-level page tables patches (take 2) Nick Piggin
2004-12-22 9:52 ` [PATCH 1/11] parentheses to x86-64 macro Nick Piggin
2004-12-22 9:53 ` [PATCH 2/11] generic 3-level nopmd folding header Nick Piggin
2004-12-22 9:54 ` [PATCH 3/11] convert i386 to generic nopmd header Nick Piggin
2004-12-22 9:54 ` Nick Piggin [this message]
2004-12-22 9:55 ` [PATCH 5/11] replace clear_page_tables with clear_page_range Nick Piggin
2004-12-22 9:56 ` [PATCH 6/11] introduce 4-level nopud folding header Nick Piggin
2004-12-22 9:57 ` [PATCH 7/11] convert Linux to 4-level page tables Nick Piggin
2004-12-22 9:59 ` [PATCH 8/11] introduce fallback header Nick Piggin
2004-12-22 10:00 ` [PATCH 9/11] convert i386 to generic nopud header Nick Piggin
2004-12-22 10:00 ` [PATCH 10/11] convert ia64 " Nick Piggin
2004-12-22 10:01 ` [PATCH 11/11] convert x86_64 to 4 level page tables Nick Piggin
2004-12-22 10:18 ` [PATCH 0/11] alternate 4-level page tables patches (take 2) Andi Kleen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=41C94473.7050804@yahoo.com.au \
--to=nickpiggin@yahoo.com.au \
--cc=ak@suse.de \
--cc=akpm@osdl.org \
--cc=hugh@veritas.com \
--cc=linux-mm@kvack.org \
--cc=torvalds@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox