From: Paul Davies <pauld@gelato.unsw.edu.au>
To: linux-mm@kvack.org
Cc: Paul Davies <pauld@gelato.unsw.edu.au>
Subject: [PATCH 27/29] Abstract implementation dependent code for mremap
Date: Sat, 13 Jan 2007 13:48:03 +1100 [thread overview]
Message-ID: <20070113024803.29682.7531.sendpatchset@weill.orchestra.cse.unsw.EDU.AU> (raw)
In-Reply-To: <20070113024540.29682.27024.sendpatchset@weill.orchestra.cse.unsw.EDU.AU>
PATCH 27
* Moved implementation dependent page table code from mremap.c to
pt_default.c. move_page_tables has been made part of the page table interface.
* Added partial page table lookup functions to pt-default-mm.h to
facilitate the abstraction of the page table dependent code.
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
include/linux/pt-default-mm.h | 49 +++++++++++++++
mm/mremap.c | 133 ------------------------------------------
mm/pt-default.c | 90 ++++++++++++++++++++++++++++
3 files changed, 140 insertions(+), 132 deletions(-)
Index: linux-2.6.20-rc4/mm/mremap.c
===================================================================
--- linux-2.6.20-rc4.orig/mm/mremap.c 2007-01-11 12:40:58.728788000 +1100
+++ linux-2.6.20-rc4/mm/mremap.c 2007-01-11 12:41:42.240788000 +1100
@@ -18,143 +18,12 @@
#include <linux/highmem.h>
#include <linux/security.h>
#include <linux/syscalls.h>
+#include <linux/pt.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
-static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
-
- pgd = pgd_offset(mm, addr);
- if (pgd_none_or_clear_bad(pgd))
- return NULL;
-
- pud = pud_offset(pgd, addr);
- if (pud_none_or_clear_bad(pud))
- return NULL;
-
- pmd = pmd_offset(pud, addr);
- if (pmd_none_or_clear_bad(pmd))
- return NULL;
-
- return pmd;
-}
-
-static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
-
- pgd = pgd_offset(mm, addr);
- pud = pud_alloc(mm, pgd, addr);
- if (!pud)
- return NULL;
-
- pmd = pmd_alloc(mm, pud, addr);
- if (!pmd)
- return NULL;
-
- if (!pmd_present(*pmd) && __pte_alloc(mm, pmd, addr))
- return NULL;
-
- return pmd;
-}
-
-static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
- unsigned long old_addr, unsigned long old_end,
- struct vm_area_struct *new_vma, pmd_t *new_pmd,
- unsigned long new_addr)
-{
- struct address_space *mapping = NULL;
- struct mm_struct *mm = vma->vm_mm;
- pte_t *old_pte, *new_pte, pte;
- spinlock_t *old_ptl, *new_ptl;
-
- if (vma->vm_file) {
- /*
- * Subtle point from Rajesh Venkatasubramanian: before
- * moving file-based ptes, we must lock vmtruncate out,
- * since it might clean the dst vma before the src vma,
- * and we propagate stale pages into the dst afterward.
- */
- mapping = vma->vm_file->f_mapping;
- spin_lock(&mapping->i_mmap_lock);
- if (new_vma->vm_truncate_count &&
- new_vma->vm_truncate_count != vma->vm_truncate_count)
- new_vma->vm_truncate_count = 0;
- }
-
- /*
- * We don't have to worry about the ordering of src and dst
- * pte locks because exclusive mmap_sem prevents deadlock.
- */
- old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
- new_pte = pte_offset_map_nested(new_pmd, new_addr);
- new_ptl = pte_lockptr(mm, new_pmd);
- if (new_ptl != old_ptl)
- spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
- arch_enter_lazy_mmu_mode();
-
- for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
- new_pte++, new_addr += PAGE_SIZE) {
- if (pte_none(*old_pte))
- continue;
- pte = ptep_clear_flush(vma, old_addr, old_pte);
- /* ZERO_PAGE can be dependant on virtual addr */
- pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
- set_pte_at(mm, new_addr, new_pte, pte);
- }
-
- arch_leave_lazy_mmu_mode();
- if (new_ptl != old_ptl)
- spin_unlock(new_ptl);
- pte_unmap_nested(new_pte - 1);
- pte_unmap_unlock(old_pte - 1, old_ptl);
- if (mapping)
- spin_unlock(&mapping->i_mmap_lock);
-}
-
-#define LATENCY_LIMIT (64 * PAGE_SIZE)
-
-static unsigned long move_page_tables(struct vm_area_struct *vma,
- unsigned long old_addr, struct vm_area_struct *new_vma,
- unsigned long new_addr, unsigned long len)
-{
- unsigned long extent, next, old_end;
- pmd_t *old_pmd, *new_pmd;
-
- old_end = old_addr + len;
- flush_cache_range(vma, old_addr, old_end);
-
- for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
- cond_resched();
- next = (old_addr + PMD_SIZE) & PMD_MASK;
- if (next - 1 > old_end)
- next = old_end;
- extent = next - old_addr;
- old_pmd = get_old_pmd(vma->vm_mm, old_addr);
- if (!old_pmd)
- continue;
- new_pmd = alloc_new_pmd(vma->vm_mm, new_addr);
- if (!new_pmd)
- break;
- next = (new_addr + PMD_SIZE) & PMD_MASK;
- if (extent > next - new_addr)
- extent = next - new_addr;
- if (extent > LATENCY_LIMIT)
- extent = LATENCY_LIMIT;
- move_ptes(vma, old_pmd, old_addr, old_addr + extent,
- new_vma, new_pmd, new_addr);
- }
-
- return len + old_addr - old_end; /* how much done */
-}
-
static unsigned long move_vma(struct vm_area_struct *vma,
unsigned long old_addr, unsigned long old_len,
unsigned long new_len, unsigned long new_addr)
Index: linux-2.6.20-rc4/mm/pt-default.c
===================================================================
--- linux-2.6.20-rc4.orig/mm/pt-default.c 2007-01-11 12:40:58.728788000 +1100
+++ linux-2.6.20-rc4/mm/pt-default.c 2007-01-11 12:41:42.240788000 +1100
@@ -1058,3 +1058,93 @@
}
#endif
+
+static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
+ unsigned long old_addr, unsigned long old_end,
+ struct vm_area_struct *new_vma, pmd_t *new_pmd,
+ unsigned long new_addr)
+{
+ struct address_space *mapping = NULL;
+ struct mm_struct *mm = vma->vm_mm;
+ pte_t *old_pte, *new_pte, pte;
+ spinlock_t *old_ptl, *new_ptl;
+
+ if (vma->vm_file) {
+ /*
+ * Subtle point from Rajesh Venkatasubramanian: before
+ * moving file-based ptes, we must lock vmtruncate out,
+ * since it might clean the dst vma before the src vma,
+ * and we propagate stale pages into the dst afterward.
+ */
+ mapping = vma->vm_file->f_mapping;
+ spin_lock(&mapping->i_mmap_lock);
+ if (new_vma->vm_truncate_count &&
+ new_vma->vm_truncate_count != vma->vm_truncate_count)
+ new_vma->vm_truncate_count = 0;
+ }
+
+ /*
+ * We don't have to worry about the ordering of src and dst
+ * pte locks because exclusive mmap_sem prevents deadlock.
+ */
+ old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
+ new_pte = pte_offset_map_nested(new_pmd, new_addr);
+ new_ptl = pte_lockptr(mm, new_pmd);
+ if (new_ptl != old_ptl)
+ spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+ arch_enter_lazy_mmu_mode();
+
+ for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
+ new_pte++, new_addr += PAGE_SIZE) {
+ if (pte_none(*old_pte))
+ continue;
+ pte = ptep_clear_flush(vma, old_addr, old_pte);
+ /* ZERO_PAGE can be dependant on virtual addr */
+ pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
+ set_pte_at(mm, new_addr, new_pte, pte);
+ }
+
+ arch_leave_lazy_mmu_mode();
+ if (new_ptl != old_ptl)
+ spin_unlock(new_ptl);
+ pte_unmap_nested(new_pte - 1);
+ pte_unmap_unlock(old_pte - 1, old_ptl);
+ if (mapping)
+ spin_unlock(&mapping->i_mmap_lock);
+}
+
+#define LATENCY_LIMIT (64 * PAGE_SIZE)
+
+unsigned long move_page_tables(struct vm_area_struct *vma,
+ unsigned long old_addr, struct vm_area_struct *new_vma,
+ unsigned long new_addr, unsigned long len)
+{
+ unsigned long extent, next, old_end;
+ pmd_t *old_pmd, *new_pmd;
+
+ old_end = old_addr + len;
+ flush_cache_range(vma, old_addr, old_end);
+
+ for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
+ cond_resched();
+ next = (old_addr + PMD_SIZE) & PMD_MASK;
+ if (next - 1 > old_end)
+ next = old_end;
+ extent = next - old_addr;
+ old_pmd = lookup_pmd(vma->vm_mm, old_addr);
+ if (!old_pmd)
+ continue;
+ new_pmd = build_pmd(vma->vm_mm, new_addr);
+ if (!new_pmd)
+ break;
+ next = (new_addr + PMD_SIZE) & PMD_MASK;
+ if (extent > next - new_addr)
+ extent = next - new_addr;
+ if (extent > LATENCY_LIMIT)
+ extent = LATENCY_LIMIT;
+ move_ptes(vma, old_pmd, old_addr, old_addr + extent,
+ new_vma, new_pmd, new_addr);
+ }
+
+ return len + old_addr - old_end; /* how much done */
+}
Index: linux-2.6.20-rc4/include/linux/pt-default-mm.h
===================================================================
--- linux-2.6.20-rc4.orig/include/linux/pt-default-mm.h 2007-01-11 12:40:58.752788000 +1100
+++ linux-2.6.20-rc4/include/linux/pt-default-mm.h 2007-01-11 12:41:42.268788000 +1100
@@ -72,5 +72,54 @@
((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
NULL: pte_offset_kernel(pmd, address))
+static inline pmd_t *lookup_pmd(struct mm_struct *mm, unsigned long address)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ if (mm!=&init_mm) { /* Look up user page table */
+ pgd = pgd_offset(mm, address);
+ if (pgd_none_or_clear_bad(pgd))
+ return NULL;
+ } else { /* Look up kernel page table */
+ pgd = pgd_offset_k(address);
+ if (pgd_none_or_clear_bad(pgd))
+ return NULL;
+ }
+
+ pud = pud_offset(pgd, address);
+ if (pud_none_or_clear_bad(pud)) {
+ return NULL;
+ }
+
+ pmd = pmd_offset(pud, address);
+ if (pmd_none_or_clear_bad(pmd)) {
+ return NULL;
+ }
+
+ return pmd;
+}
+
+static inline pmd_t *build_pmd(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = pgd_offset(mm, addr);
+ pud = pud_alloc(mm, pgd, addr);
+ if (!pud)
+ return NULL;
+
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return NULL;
+
+ if (!pmd_present(*pmd) && __pte_alloc(mm, pmd, addr))
+ return NULL;
+
+ return pmd;
+}
#endif
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2007-01-13 2:48 UTC|newest]
Thread overview: 60+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-01-13 2:45 [PATCH 0/29] Page Table Interface Explanation Paul Davies
2007-01-13 2:45 ` [PATCH 1/29] Abstract current page table implementation Paul Davies
2007-01-13 2:45 ` [PATCH 2/29] " Paul Davies
2007-01-13 2:45 ` [PATCH 3/29] " Paul Davies
2007-01-16 18:55 ` Christoph Lameter
2007-01-13 2:46 ` [PATCH 4/29] Introduce Page Table Interface (PTI) Paul Davies
2007-01-16 19:02 ` Christoph Lameter
2007-01-13 2:46 ` [PATCH 5/29] Start calling simple PTI functions Paul Davies
2007-01-16 19:04 ` Christoph Lameter
2007-01-18 6:43 ` Paul Cameron Davies
2007-01-13 2:46 ` [PATCH 6/29] Tweak IA64 arch dependent files to work with PTI Paul Davies
2007-01-16 19:05 ` Christoph Lameter
2007-01-13 2:46 ` [PATCH 7/29] Continue calling simple PTI functions Paul Davies
2007-01-16 19:08 ` Christoph Lameter
2007-01-13 2:46 ` [PATCH 8/29] Clean up page fault handers Paul Davies
2007-01-13 2:46 ` [PATCH 9/29] Clean up page fault handlers Paul Davies
2007-01-13 2:46 ` [PATCH 10/29] Call simple PTI functions Paul Davies
2007-01-13 2:46 ` [PATCH 11/29] Call simple PTI functions cont Paul Davies
2007-01-13 2:46 ` [PATCH 12/29] Abstract page table tear down Paul Davies
2007-01-13 2:46 ` [PATCH 13/29] Finish abstracting " Paul Davies
2007-01-13 2:46 ` [PATCH 14/29] Abstract copy page range iterator Paul Davies
2007-01-13 2:46 ` [PATCH 15/29] Finish abstracting copy page range Paul Davies
2007-01-13 2:47 ` [PATCH 16/29] Abstract unmap page range iterator Paul Davies
2007-01-13 2:47 ` [PATCH 17/29] Finish abstracting unmap page range Paul Davies
2007-01-13 2:47 ` [PATCH 18/29] Abstract zeromap " Paul Davies
2007-01-13 2:47 ` [PATCH 19/29] Abstract remap pfn range Paul Davies
2007-01-13 2:47 ` [PATCH 20/29] Abstract change protection iterator Paul Davies
2007-01-13 2:47 ` [PATCH 21/29] Abstract unmap vm area Paul Davies
2007-01-13 2:47 ` [PATCH 22/29] Abstract map " Paul Davies
2007-01-13 2:47 ` [PATCH 23/29] Abstract unuse_vma Paul Davies
2007-01-13 2:47 ` [PATCH 24/29] Abstract smaps iterator Paul Davies
2007-01-13 2:47 ` [PATCH 25/29] Abstact mempolicy iterator Paul Davies
2007-01-13 2:47 ` [PATCH 26/29] Abstract mempolicy iterator cont Paul Davies
2007-01-13 2:48 ` Paul Davies [this message]
2007-01-13 2:48 ` [PATCH 28/29] Abstract ioremap iterator Paul Davies
2007-01-13 2:48 ` [PATCH 29/29] Tweak i386 arch dependent files to work with PTI Paul Davies
2007-01-13 2:48 ` [PATCH 1/5] Introduce IA64 page table interface Paul Davies
2007-01-13 2:48 ` [PATCH 2/5] Abstract pgtable Paul Davies
2007-01-13 2:48 ` [PATCH 3/5] Abstact pgtable continued Paul Davies
2007-01-13 2:48 ` [PATCH 4/5] Abstract assembler lookup Paul Davies
2007-01-13 2:48 ` [PATCH 5/5] Abstract pgalloc Paul Davies
2007-01-13 2:48 ` [PATCH 1/12] Alternate page table implementation (GPT) Paul Davies
2007-01-13 2:48 ` [PATCH 2/12] Alternate page table implementation cont Paul Davies
2007-01-13 2:48 ` [PATCH 3/12] " Paul Davies
2007-01-13 2:49 ` [PATCH 4/12] " Paul Davies
2007-01-13 2:49 ` [PATCH 5/12] " Paul Davies
2007-01-13 2:49 ` [PATCH 6/12] " Paul Davies
2007-01-13 2:49 ` [PATCH 7/12] " Paul Davies
2007-01-13 2:49 ` [PATCH 8/12] " Paul Davies
2007-01-13 2:49 ` [PATCH 9/12] " Paul Davies
2007-01-13 2:49 ` [PATCH 10/12] " Paul Davies
2007-01-13 2:49 ` [PATCH 11/12] " Paul Davies
2007-01-13 2:49 ` [PATCH 12/12] " Paul Davies
2007-01-13 19:29 ` [PATCH 0/29] Page Table Interface Explanation Peter Zijlstra
2007-01-14 10:06 ` Paul Cameron Davies
2007-01-16 18:49 ` Christoph Lameter
2007-01-18 6:22 ` Paul Cameron Davies
2007-01-16 18:51 ` Christoph Lameter
2007-01-18 6:53 ` Paul Cameron Davies
2007-01-16 19:14 ` Christoph Lameter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070113024803.29682.7531.sendpatchset@weill.orchestra.cse.unsw.EDU.AU \
--to=pauld@gelato.unsw.edu.au \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox