* [PATCH 1/18] PTI - Introduce page table interface
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
@ 2006-07-13 4:26 ` Paul Davies
2006-07-13 4:26 ` [PATCH 2/18] PTI - Page table type Paul Davies
` (16 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:26 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
This patch does the following:
1) Introduces include/linux/pt.h which contains the definitions
for the page table interface PTI.
2) Introduces a part of the default page table implementation
that is contained in include/linux/pt-default.h
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
pt-default.h | 166 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
pt.h | 126 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 292 insertions(+)
Index: linux-2.6.17.2/include/linux/pt.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17.2/include/linux/pt.h 2006-07-08 22:29:10.827781552 +1000
@@ -0,0 +1,126 @@
+#ifndef _LINUX_PT_H
+#define _LINUX_PT_H 1
+
+#include <linux/pt-default.h>
+
+/* Page Table Interface */
+
+int create_user_page_table(struct mm_struct *mm);
+
+void destroy_user_page_table(struct mm_struct *mm);
+
+pte_t *build_page_table(struct mm_struct *mm,
+ unsigned long address, pt_path_t *pt_path);
+
+pte_t *lookup_page_table(struct mm_struct *mm,
+ unsigned long address, pt_path_t *pt_path);
+
+pte_t *lookup_gate_area(struct mm_struct *mm,
+ unsigned long pg);
+
+void coallesce_vmas(struct vm_area_struct **vma_p,
+ struct vm_area_struct **next_p);
+
+void free_page_table_range(struct mmu_gather **tlb,
+ unsigned long addr, unsigned long end,
+ unsigned long floor, unsigned long ceiling);
+
+/* memory.c iterators */
+int copy_dual_iterator(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ unsigned long addr, unsigned long end, struct vm_area_struct *vma);
+
+unsigned long unmap_page_range_iterator(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, unsigned long addr, unsigned long end,
+ long *zap_work, struct zap_details *details);
+
+int zeromap_build_iterator(struct mm_struct *mm,
+ unsigned long addr, unsigned long end, pgprot_t prot);
+
+int remap_build_iterator(struct mm_struct *mm,
+ unsigned long addr, unsigned long end, unsigned long pfn,
+ pgprot_t prot);
+
+/* vmalloc.c iterators */
+
+void vunmap_read_iterator(unsigned long addr, unsigned long end);
+
+int vmap_build_iterator(unsigned long addr,
+ unsigned long end, pgprot_t prot, struct page ***pages);
+
+/* mprotect.c iterator */
+void change_protection_read_iterator(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end, pgprot_t newprot);
+
+/* msync.c iterator */
+unsigned long msync_read_iterator(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end);
+
+/* swapfile.c iterator */
+int unuse_vma_read_iterator(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end, swp_entry_t entry, struct page *page);
+
+/* smaps */
+
+void smaps_read_range(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end, struct mem_size_stats *mss);
+
+/* movepagetables */
+unsigned long move_page_tables(struct vm_area_struct *vma,
+ unsigned long old_addr, struct vm_area_struct *new_vma,
+ unsigned long new_addr, unsigned long len);
+
+/* mempolicy.c */
+int check_policy_read_iterator(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end,
+ const nodemask_t *nodes, unsigned long flags,
+ void *private);
+
+
+ /* Functions called by iterators in the PTI */
+void copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+ unsigned long addr, int *rss);
+
+void zap_one_pte(pte_t *pte, struct mm_struct *mm, unsigned long addr,
+ struct vm_area_struct *vma, long *zap_work, struct zap_details *details,
+ struct mmu_gather *tlb, int *anon_rss, int* file_rss);
+
+void zeromap_one_pte(struct mm_struct *mm, pte_t *pte, unsigned long addr, pgprot_t prot);
+
+void remap_one_pte(struct mm_struct *mm, pte_t *pte, unsigned long addr,
+ unsigned long pfn, pgprot_t prot);
+
+void vunmap_one_pte(pte_t *pte, unsigned long address);
+
+int vmap_one_pte(pte_t *pte, unsigned long addr,
+ struct page ***pages, pgprot_t prot);
+
+void change_prot_pte(struct mm_struct *mm, pte_t *pte,
+ unsigned long address, pgprot_t newprot);
+
+int msync_one_pte(pte_t *pte, unsigned long address,
+ struct vm_area_struct *vma, unsigned long *ret);
+
+void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
+ unsigned long addr, swp_entry_t entry, struct page *page);
+
+void mremap_move_pte(struct vm_area_struct *vma,
+ struct vm_area_struct *new_vma, pte_t *old_pte, pte_t *new_pte,
+ unsigned long old_addr, unsigned long new_addr);
+
+void smaps_one_pte(struct vm_area_struct *vma, unsigned long addr, pte_t *pte,
+ struct mem_size_stats *mss);
+
+int mempolicy_check_one_pte(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *pte, const nodemask_t *nodes, unsigned long flags, void *private);
+
+
+static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
+{
+ if (file_rss)
+ add_mm_counter(mm, file_rss, file_rss);
+ if (anon_rss)
+ add_mm_counter(mm, anon_rss, anon_rss);
+}
+
+#endif
Index: linux-2.6.17.2/include/linux/pt-default.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17.2/include/linux/pt-default.h 2006-07-08 22:30:34.907999416 +1000
@@ -0,0 +1,166 @@
+#ifndef _LINUX_PT_DEFAULT_H
+#define _LINUX_PT_DEFAULT_H 1
+
+#include <linux/rmap.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+
+#include <asm/pgalloc.h>
+
+typedef struct struct_pt_path { pmd_t *pmd; } pt_path_t;
+
+static inline int create_user_page_table(struct mm_struct * mm)
+{
+ mm->pt.pgd = pgd_alloc(mm);
+ if (unlikely(!mm->pt.pgd))
+ return -ENOMEM;
+ return 0;
+}
+
+static inline void destroy_user_page_table(struct mm_struct * mm)
+{
+ pgd_free(mm->pt.pgd);
+}
+
+/*
+ * This function builds the page table atomically and saves
+ * the partial path for a fast lookup later on.
+ */
+static inline pte_t *build_page_table(struct mm_struct *mm,
+ unsigned long address, pt_path_t *pt_path)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = pgd_offset(mm, address);
+ pud = pud_alloc(mm, pgd, address);
+ if (!pud)
+ return NULL;
+ pmd = pmd_alloc(mm, pud, address);
+ if (!pmd)
+ return NULL;
+
+ pt_path->pmd = pmd;
+ return pte_alloc_map(mm, pmd, address);
+}
+
+static inline pte_t *lookup_page_table(struct mm_struct *mm,
+ unsigned long address, pt_path_t *pt_path)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ if (mm!=&init_mm) { /* Look up user page table */
+ pgd = pgd_offset(mm, address);
+ if (pgd_none_or_clear_bad(pgd))
+ return NULL;
+ } else { /* Look up kernel page table */
+ pgd = pgd_offset_k(address);
+ if (pgd_none_or_clear_bad(pgd))
+ return NULL;
+ }
+
+ pud = pud_offset(pgd, address);
+ if (pud_none_or_clear_bad(pud)) {
+ return NULL;
+ }
+
+ pmd = pmd_offset(pud, address);
+ if (pmd_none_or_clear_bad(pmd)) {
+ return NULL;
+ }
+
+ if(pt_path)
+ pt_path->pmd = pmd;
+
+ return pte_offset_map(pmd, address);
+}
+
+static inline pte_t *lookup_gate_area(struct mm_struct *mm,
+ unsigned long pg)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ if (pg > TASK_SIZE)
+ pgd = pgd_offset_k(pg);
+ else
+ pgd = pgd_offset_gate(mm, pg);
+ BUG_ON(pgd_none(*pgd));
+ pud = pud_offset(pgd, pg);
+ BUG_ON(pud_none(*pud));
+ pmd = pmd_offset(pud, pg);
+ if (pmd_none(*pmd))
+ return NULL;
+ pte = pte_offset_map(pmd, pg);
+ return pte;
+}
+
+static inline void coallesce_vmas(struct vm_area_struct **vma_p,
+ struct vm_area_struct **next_p)
+{
+ struct vm_area_struct *vma, *next;
+
+ vma = *vma_p;
+ next = *next_p;
+
+ /*
+ * Optimization: gather nearby vmas into one call down
+ */
+ while (next && next->vm_start <= vma->vm_end + PMD_SIZE) {
+ vma = next;
+ next = vma->vm_next;
+ anon_vma_unlink(vma);
+ unlink_file_vma(vma);
+ }
+
+ *vma_p = vma;
+ *next_p = next;
+}
+
+/*
+ * Locks the ptes notionally pointed to by the page table path.
+ */
+#define lock_pte(mm, pt_path) \
+ ({ spin_lock(pte_lockptr(mm, pt_path.pmd));})
+
+/*
+ * Unlocks the ptes notionally pointed to by the
+ * page table path.
+ */
+#define unlock_pte(mm, pt_path) \
+ ({ spin_unlock(pte_lockptr(mm, pt_path.pmd)); })
+
+/*
+ * Looks up a page table from a saved path. It also
+ * locks the page table.
+ */
+#define lookup_page_table_fast(mm, pt_path, address) \
+({ \
+ spinlock_t *__ptl = pte_lockptr(mm, pt_path.pmd); \
+ pte_t *__pte = pte_offset_map(pt_path.pmd, address); \
+ spin_lock(__ptl); \
+ __pte; \
+})
+
+/*
+ * Check that the original pte hasn't change.
+ */
+#define atomic_pte_same(mm, pte, orig_pte, pt_path) \
+({ \
+ spinlock_t *ptl = pte_lockptr(mm, pt_path.pmd); \
+ int __same; \
+ spin_lock(ptl); \
+ __same = pte_same(*pte, orig_pte); \
+ spin_unlock(ptl); \
+ __same; \
+})
+
+#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
+#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
+
+#endif
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 2/18] PTI - Page table type
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
2006-07-13 4:26 ` [PATCH 1/18] PTI - Introduce page table interface Paul Davies
@ 2006-07-13 4:26 ` Paul Davies
2006-07-13 4:27 ` [PATCH 3/18] PTI - Abstract default page table Paul Davies
` (15 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:26 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
This patch does the following:
1) Introduces a page table type in include/linux/pt-type.h.
2) VM code making reference to pgds is replaced with references
to the pt_type.
* pgd is replaced in sched.h with new page table type, pt_type_t.
* fork.c calls implementation in pt-default.h and no longer
directly refers to pgds.
* pgtable.h & mmu_context.h references to pgd are removed for i386 and ia64.
* init_task.h reference to pgd removed.
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
fs/proc/task_mmu.c | 10 +---------
include/asm-ia64/mmu_context.h | 2 +-
include/asm-ia64/pgtable.h | 4 ++--
include/linux/init_task.h | 2 +-
include/linux/mm.h | 9 +++++++++
include/linux/pt-type.h | 9 +++++++++
include/linux/sched.h | 3 ++-
include/linux/swapops.h | 5 +++++
kernel/fork.c | 24 ++++++------------------
mm/memory.c | 10 +---------
10 files changed, 37 insertions(+), 41 deletions(-)
Index: linux-2.6.17.2/include/linux/mm.h
===================================================================
--- linux-2.6.17.2.orig/include/linux/mm.h 2006-07-08 23:23:46.298145512 +1000
+++ linux-2.6.17.2/include/linux/mm.h 2006-07-08 23:25:15.198630584 +1000
@@ -789,6 +789,15 @@
extern struct shrinker *set_shrinker(int, shrinker_t);
extern void remove_shrinker(struct shrinker *shrinker);
+struct mem_size_stats
+{
+ unsigned long resident;
+ unsigned long shared_clean;
+ unsigned long shared_dirty;
+ unsigned long private_clean;
+ unsigned long private_dirty;
+};
+
extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl));
int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
Index: linux-2.6.17.2/mm/memory.c
===================================================================
--- linux-2.6.17.2.orig/mm/memory.c 2006-07-08 23:23:46.299145360 +1000
+++ linux-2.6.17.2/mm/memory.c 2006-07-08 23:25:15.200630280 +1000
@@ -48,8 +48,8 @@
#include <linux/rmap.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/pt.h>
-#include <asm/pgalloc.h>
#include <asm/uaccess.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
@@ -333,14 +333,6 @@
return 0;
}
-static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
-{
- if (file_rss)
- add_mm_counter(mm, file_rss, file_rss);
- if (anon_rss)
- add_mm_counter(mm, anon_rss, anon_rss);
-}
-
/*
* This function is called to print an error when a bad pte
* is found. For example, we might have a PFN-mapped pte in
Index: linux-2.6.17.2/fs/proc/task_mmu.c
===================================================================
--- linux-2.6.17.2.orig/fs/proc/task_mmu.c 2006-07-08 23:23:46.299145360 +1000
+++ linux-2.6.17.2/fs/proc/task_mmu.c 2006-07-08 23:25:15.201630128 +1000
@@ -5,6 +5,7 @@
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/mempolicy.h>
+#include <linux/pt.h>
#include <asm/elf.h>
#include <asm/uaccess.h>
@@ -109,15 +110,6 @@
seq_printf(m, "%*c", len, ' ');
}
-struct mem_size_stats
-{
- unsigned long resident;
- unsigned long shared_clean;
- unsigned long shared_dirty;
- unsigned long private_clean;
- unsigned long private_dirty;
-};
-
static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
{
struct task_struct *task = m->private;
Index: linux-2.6.17.2/include/linux/swapops.h
===================================================================
--- linux-2.6.17.2.orig/include/linux/swapops.h 2006-07-08 23:23:46.298145512 +1000
+++ linux-2.6.17.2/include/linux/swapops.h 2006-07-08 23:25:15.201630128 +1000
@@ -1,3 +1,6 @@
+#ifndef _LINUX_SWAPOPS_H
+#define _LINUX_SWAPOPS_H 1
+
/*
* swapcache pages are stored in the swapper_space radix tree. We want to
* get good packing density in that tree, so the index should be dense in
@@ -67,3 +70,5 @@
BUG_ON(pte_file(__swp_entry_to_pte(arch_entry)));
return __swp_entry_to_pte(arch_entry);
}
+
+#endif
Index: linux-2.6.17.2/include/linux/pt-type.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17.2/include/linux/pt-type.h 2006-07-08 23:25:15.202629976 +1000
@@ -0,0 +1,9 @@
+#ifndef _LINUX_PT_TYPE_H
+#define _LINUX_PT_TYPE_H
+
+typedef struct struct_pt_type { pgd_t *pgd; } pt_type_t;
+
+#define get_root_pt(mm) (mm->pt.pgd)
+#define set_root_pt .pt.pgd = swapper_pg_dir
+
+#endif
Index: linux-2.6.17.2/include/linux/sched.h
===================================================================
--- linux-2.6.17.2.orig/include/linux/sched.h 2006-07-08 23:23:46.298145512 +1000
+++ linux-2.6.17.2/include/linux/sched.h 2006-07-08 23:25:15.203629824 +1000
@@ -23,6 +23,7 @@
#include <asm/mmu.h>
#include <asm/cputime.h>
+#include <linux/pt-type.h>
#include <linux/smp.h>
#include <linux/sem.h>
#include <linux/signal.h>
@@ -304,7 +305,7 @@
unsigned long task_size; /* size of task vm space */
unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */
unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */
- pgd_t * pgd;
+ pt_type_t pt; /* Page table */
atomic_t mm_users; /* How many users with user space? */
atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */
int map_count; /* number of VMAs */
Index: linux-2.6.17.2/include/asm-ia64/pgtable.h
===================================================================
--- linux-2.6.17.2.orig/include/asm-ia64/pgtable.h 2006-07-08 23:23:46.298145512 +1000
+++ linux-2.6.17.2/include/asm-ia64/pgtable.h 2006-07-08 23:25:15.203629824 +1000
@@ -347,13 +347,13 @@
static inline pgd_t*
pgd_offset (struct mm_struct *mm, unsigned long address)
{
- return mm->pgd + pgd_index(address);
+ return mm->pt.pgd + pgd_index(address);
}
/* In the kernel's mapped region we completely ignore the region number
(since we know it's in region number 5). */
#define pgd_offset_k(addr) \
- (init_mm.pgd + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)))
+ (init_mm.pt.pgd + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)))
/* Look up a pgd entry in the gate area. On IA-64, the gate-area
resides in the kernel-mapped segment, hence we use pgd_offset_k()
Index: linux-2.6.17.2/include/asm-ia64/mmu_context.h
===================================================================
--- linux-2.6.17.2.orig/include/asm-ia64/mmu_context.h 2006-07-08 23:23:46.299145360 +1000
+++ linux-2.6.17.2/include/asm-ia64/mmu_context.h 2006-07-08 23:25:15.204629672 +1000
@@ -191,7 +191,7 @@
* We may get interrupts here, but that's OK because interrupt
* handlers cannot touch user-space.
*/
- ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd));
+ ia64_set_kr(IA64_KR_PT_BASE, __pa(get_root_pt(next)));
activate_context(next);
}
Index: linux-2.6.17.2/kernel/fork.c
===================================================================
--- linux-2.6.17.2.orig/kernel/fork.c 2006-07-08 23:23:46.299145360 +1000
+++ linux-2.6.17.2/kernel/fork.c 2006-07-08 23:25:51.298142624 +1000
@@ -44,9 +44,9 @@
#include <linux/rmap.h>
#include <linux/acct.h>
#include <linux/cn_proc.h>
+#include <linux/pt.h>
#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
@@ -286,22 +286,10 @@
goto out;
}
-static inline int mm_alloc_pgd(struct mm_struct * mm)
-{
- mm->pgd = pgd_alloc(mm);
- if (unlikely(!mm->pgd))
- return -ENOMEM;
- return 0;
-}
-
-static inline void mm_free_pgd(struct mm_struct * mm)
-{
- pgd_free(mm->pgd);
-}
#else
#define dup_mmap(mm, oldmm) (0)
-#define mm_alloc_pgd(mm) (0)
-#define mm_free_pgd(mm)
+#define create_user_page_table(mm) (0)
+#define destroy_user_page_table(mm)
#endif /* CONFIG_MMU */
__cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
@@ -327,7 +315,7 @@
mm->free_area_cache = TASK_UNMAPPED_BASE;
mm->cached_hole_size = ~0UL;
- if (likely(!mm_alloc_pgd(mm))) {
+ if (likely(!create_user_page_table(mm))) {
mm->def_flags = 0;
return mm;
}
@@ -358,7 +346,7 @@
void fastcall __mmdrop(struct mm_struct *mm)
{
BUG_ON(mm == &init_mm);
- mm_free_pgd(mm);
+ destroy_user_page_table(mm);
destroy_context(mm);
free_mm(mm);
}
@@ -490,7 +478,7 @@
* If init_new_context() failed, we cannot use mmput() to free the mm
* because it calls destroy_context()
*/
- mm_free_pgd(mm);
+ destroy_user_page_table(mm);
free_mm(mm);
return NULL;
}
Index: linux-2.6.17.2/include/linux/init_task.h
===================================================================
--- linux-2.6.17.2.orig/include/linux/init_task.h 2006-07-08 23:23:46.298145512 +1000
+++ linux-2.6.17.2/include/linux/init_task.h 2006-07-08 23:25:15.205629520 +1000
@@ -44,7 +44,7 @@
#define INIT_MM(name) \
{ \
.mm_rb = RB_ROOT, \
- .pgd = swapper_pg_dir, \
+ set_root_pt, \
.mm_users = ATOMIC_INIT(2), \
.mm_count = ATOMIC_INIT(1), \
.mmap_sem = __RWSEM_INITIALIZER(name.mmap_sem), \
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 3/18] PTI - Abstract default page table
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
2006-07-13 4:26 ` [PATCH 1/18] PTI - Introduce page table interface Paul Davies
2006-07-13 4:26 ` [PATCH 2/18] PTI - Page table type Paul Davies
@ 2006-07-13 4:27 ` Paul Davies
2006-07-13 4:27 ` [PATCH 4/18] " Paul Davies
` (14 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:27 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
This patch does the following:
1) Starts abstraction of page table implementation from memory.c to
pt-default.c
* Add mm/pt-default.c to contain majority of page table implementation
for the Linux default page table.
* Add pt-default.c to mm/Makefile
* Move page table allocation functions from memory.c to pt-default.c
2) Carried over from previous patch
* pgtable.h & mmu_context.h references to pgd are removed for i386.
* init_task.h reference to pgd removed.
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
arch/i386/mm/fault.c | 2
include/asm-i386/mmu_context.h | 5 -
include/asm-i386/pgtable.h | 2
mm/Makefile | 2
mm/memory.c | 87 ---------------------------------
mm/pt-default.c | 105 +++++++++++++++++++++++++++++++++++++++++
6 files changed, 110 insertions(+), 93 deletions(-)
Index: linux-2.6.17.2/mm/Makefile
===================================================================
--- linux-2.6.17.2.orig/mm/Makefile 2006-07-07 21:31:11.155866904 +1000
+++ linux-2.6.17.2/mm/Makefile 2006-07-07 21:31:13.847457720 +1000
@@ -5,7 +5,7 @@
mmu-y := nommu.o
mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
- vmalloc.o
+ vmalloc.o pt-default.o
obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
page_alloc.o page-writeback.o pdflush.o \
Index: linux-2.6.17.2/mm/pt-default.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17.2/mm/pt-default.c 2006-07-07 22:06:38.839684032 +1000
@@ -0,0 +1,105 @@
+#include <linux/kernel_stat.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/mman.h>
+#include <linux/swap.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pt.h>
+
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+
+#include <linux/swapops.h>
+#include <linux/elf.h>
+
+/*
+ * If a p?d_bad entry is found while walking page tables, report
+ * the error, before resetting entry to p?d_none. Usually (but
+ * very seldom) called out from the p?d_none_or_clear_bad macros.
+ */
+
+void pgd_clear_bad(pgd_t *pgd)
+{
+ pgd_ERROR(*pgd);
+ pgd_clear(pgd);
+}
+
+void pud_clear_bad(pud_t *pud)
+{
+ pud_ERROR(*pud);
+ pud_clear(pud);
+}
+
+void pmd_clear_bad(pmd_t *pmd)
+{
+ pmd_ERROR(*pmd);
+ pmd_clear(pmd);
+}
+
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+{
+ struct page *new = pte_alloc_one(mm, address);
+ if (!new)
+ return -ENOMEM;
+
+ pte_lock_init(new);
+ spin_lock(&mm->page_table_lock);
+ if (pmd_present(*pmd)) { /* Another has populated it */
+ pte_lock_deinit(new);
+ pte_free(new);
+ } else {
+ mm->nr_ptes++;
+ inc_page_state(nr_page_table_pages);
+ pmd_populate(mm, pmd, new);
+ }
+ spin_unlock(&mm->page_table_lock);
+ return 0;
+}
+
+int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+{
+ pte_t *new = pte_alloc_one_kernel(&init_mm, address);
+ if (!new)
+ return -ENOMEM;
+
+ spin_lock(&init_mm.page_table_lock);
+ if (pmd_present(*pmd)) /* Another has populated it */
+ pte_free_kernel(new);
+ else
+ pmd_populate_kernel(&init_mm, pmd, new);
+ spin_unlock(&init_mm.page_table_lock);
+ return 0;
+}
+
+#ifndef __PAGETABLE_PUD_FOLDED
+/*
+ * Allocate page upper directory.
+ * We've already handled the fast-path in-line.
+ */
+int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+ pud_t *new = pud_alloc_one(mm, address);
+ if (!new)
+ return -ENOMEM;
+
+ spin_lock(&mm->page_table_lock);
+ if (pgd_present(*pgd)) /* Another has populated it */
+ pud_free(new);
+ else
+ pgd_populate(mm, pgd, new);
+ spin_unlock(&mm->page_table_lock);
+ return 0;
+}
+#else
+/* Workaround for gcc 2.96 */
+int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+ return 0;
+}
+#endif /* __PAGETABLE_PUD_FOLDED */
Index: linux-2.6.17.2/arch/i386/mm/fault.c
===================================================================
--- linux-2.6.17.2.orig/arch/i386/mm/fault.c 2006-07-07 21:31:11.168864928 +1000
+++ linux-2.6.17.2/arch/i386/mm/fault.c 2006-07-07 21:31:13.848457568 +1000
@@ -222,7 +222,7 @@
pmd_t *pmd, *pmd_k;
pgd += index;
- pgd_k = init_mm.pgd + index;
+ pgd_k = init_mm.pt.pgd + index;
if (!pgd_present(*pgd_k))
return NULL;
Index: linux-2.6.17.2/include/asm-i386/mmu_context.h
===================================================================
--- linux-2.6.17.2.orig/include/asm-i386/mmu_context.h 2006-07-07 21:31:11.168864928 +1000
+++ linux-2.6.17.2/include/asm-i386/mmu_context.h 2006-07-07 21:31:13.849457416 +1000
@@ -39,8 +39,7 @@
cpu_set(cpu, next->cpu_vm_mask);
/* Re-load page tables */
- load_cr3(next->pgd);
-
+ load_cr3(get_root_pt(next));
/*
* load the LDT, if the LDT is different:
*/
@@ -56,7 +55,7 @@
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload %cr3.
*/
- load_cr3(next->pgd);
+ load_cr3(get_root_pt(next));
load_LDT_nolock(&next->context, cpu);
}
}
Index: linux-2.6.17.2/include/asm-i386/pgtable.h
===================================================================
--- linux-2.6.17.2.orig/include/asm-i386/pgtable.h 2006-07-07 21:31:11.167865080 +1000
+++ linux-2.6.17.2/include/asm-i386/pgtable.h 2006-07-07 21:31:13.850457264 +1000
@@ -339,7 +339,7 @@
* pgd_offset() returns a (pgd_t *)
* pgd_index() is used get the offset into the pgd page's array of pgd_t's;
*/
-#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
+#define pgd_offset(mm, address) ((mm)->pt.pgd+pgd_index(address))
/*
* a shortcut which implies the use of the kernel's pgd, instead
Index: linux-2.6.17.2/mm/memory.c
===================================================================
--- linux-2.6.17.2.orig/mm/memory.c 2006-07-07 21:31:13.820461824 +1000
+++ linux-2.6.17.2/mm/memory.c 2006-07-07 22:06:48.655191848 +1000
@@ -91,31 +91,6 @@
}
__setup("norandmaps", disable_randmaps);
-
-/*
- * If a p?d_bad entry is found while walking page tables, report
- * the error, before resetting entry to p?d_none. Usually (but
- * very seldom) called out from the p?d_none_or_clear_bad macros.
- */
-
-void pgd_clear_bad(pgd_t *pgd)
-{
- pgd_ERROR(*pgd);
- pgd_clear(pgd);
-}
-
-void pud_clear_bad(pud_t *pud)
-{
- pud_ERROR(*pud);
- pud_clear(pud);
-}
-
-void pmd_clear_bad(pmd_t *pmd)
-{
- pmd_ERROR(*pmd);
- pmd_clear(pmd);
-}
-
/*
* Note: this doesn't free the actual pages themselves. That
* has been handled earlier when unmapping all the memory regions.
@@ -298,41 +273,6 @@
}
}
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
-{
- struct page *new = pte_alloc_one(mm, address);
- if (!new)
- return -ENOMEM;
-
- pte_lock_init(new);
- spin_lock(&mm->page_table_lock);
- if (pmd_present(*pmd)) { /* Another has populated it */
- pte_lock_deinit(new);
- pte_free(new);
- } else {
- mm->nr_ptes++;
- inc_page_state(nr_page_table_pages);
- pmd_populate(mm, pmd, new);
- }
- spin_unlock(&mm->page_table_lock);
- return 0;
-}
-
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
-{
- pte_t *new = pte_alloc_one_kernel(&init_mm, address);
- if (!new)
- return -ENOMEM;
-
- spin_lock(&init_mm.page_table_lock);
- if (pmd_present(*pmd)) /* Another has populated it */
- pte_free_kernel(new);
- else
- pmd_populate_kernel(&init_mm, pmd, new);
- spin_unlock(&init_mm.page_table_lock);
- return 0;
-}
-
/*
* This function is called to print an error when a bad pte
* is found. For example, we might have a PFN-mapped pte in
@@ -2276,33 +2216,6 @@
EXPORT_SYMBOL_GPL(__handle_mm_fault);
-#ifndef __PAGETABLE_PUD_FOLDED
-/*
- * Allocate page upper directory.
- * We've already handled the fast-path in-line.
- */
-int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
-{
- pud_t *new = pud_alloc_one(mm, address);
- if (!new)
- return -ENOMEM;
-
- spin_lock(&mm->page_table_lock);
- if (pgd_present(*pgd)) /* Another has populated it */
- pud_free(new);
- else
- pgd_populate(mm, pgd, new);
- spin_unlock(&mm->page_table_lock);
- return 0;
-}
-#else
-/* Workaround for gcc 2.96 */
-int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
-{
- return 0;
-}
-#endif /* __PAGETABLE_PUD_FOLDED */
-
#ifndef __PAGETABLE_PMD_FOLDED
/*
* Allocate page middle directory.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 4/18] PTI - Abstract default page table
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
` (2 preceding siblings ...)
2006-07-13 4:27 ` [PATCH 3/18] PTI - Abstract default page table Paul Davies
@ 2006-07-13 4:27 ` Paul Davies
2006-07-13 4:27 ` [PATCH 5/18] " Paul Davies
` (13 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:27 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
This patch does the following:
1) Continues page table abstraction from memory.c to pt-default.c
* More allocations functions moved across.
* Page table deallocation iterator put into pt-default.c
* Removed free_pgd_range prototype from mm.h
2) Calls coallesce vmas in free_pgtables to remove direct reference
to PMD_SIZE.
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
include/linux/mm.h | 2
mm/memory.c | 53 ---------------
mm/pt-default.c | 182 +++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 185 insertions(+), 52 deletions(-)
Index: linux-2.6.17.2/mm/memory.c
===================================================================
--- linux-2.6.17.2.orig/mm/memory.c 2006-07-09 00:06:01.159110960 +1000
+++ linux-2.6.17.2/mm/memory.c 2006-07-09 00:06:01.707027664 +1000
@@ -252,23 +252,10 @@
anon_vma_unlink(vma);
unlink_file_vma(vma);
- if (is_vm_hugetlb_page(vma)) {
- hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
- floor, next? next->vm_start: ceiling);
- } else {
- /*
- * Optimization: gather nearby vmas into one call down
- */
- while (next && next->vm_start <= vma->vm_end + PMD_SIZE
- && !is_vm_hugetlb_page(next)) {
- vma = next;
- next = vma->vm_next;
- anon_vma_unlink(vma);
- unlink_file_vma(vma);
- }
- free_pgd_range(tlb, addr, vma->vm_end,
+ coallesce_vmas(&vma, &next);
+
+ free_page_table_range(tlb, addr, vma->vm_end,
floor, next? next->vm_start: ceiling);
- }
vma = next;
}
}
@@ -2216,40 +2203,6 @@
EXPORT_SYMBOL_GPL(__handle_mm_fault);
-#ifndef __PAGETABLE_PMD_FOLDED
-/*
- * Allocate page middle directory.
- * We've already handled the fast-path in-line.
- */
-int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
-{
- pmd_t *new = pmd_alloc_one(mm, address);
- if (!new)
- return -ENOMEM;
-
- spin_lock(&mm->page_table_lock);
-#ifndef __ARCH_HAS_4LEVEL_HACK
- if (pud_present(*pud)) /* Another has populated it */
- pmd_free(new);
- else
- pud_populate(mm, pud, new);
-#else
- if (pgd_present(*pud)) /* Another has populated it */
- pmd_free(new);
- else
- pgd_populate(mm, pud, new);
-#endif /* __ARCH_HAS_4LEVEL_HACK */
- spin_unlock(&mm->page_table_lock);
- return 0;
-}
-#else
-/* Workaround for gcc 2.96 */
-int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
-{
- return 0;
-}
-#endif /* __PAGETABLE_PMD_FOLDED */
-
int make_pages_present(unsigned long addr, unsigned long end)
{
int ret, len, write;
Index: linux-2.6.17.2/mm/pt-default.c
===================================================================
--- linux-2.6.17.2.orig/mm/pt-default.c 2006-07-09 00:06:01.149112480 +1000
+++ linux-2.6.17.2/mm/pt-default.c 2006-07-09 00:06:01.707027664 +1000
@@ -42,6 +42,154 @@
pmd_clear(pmd);
}
+/*
+ * Note: this doesn't free the actual pages themselves. That
+ * has been handled earlier when unmapping all the memory regions.
+ */
+static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
+{
+ struct page *page = pmd_page(*pmd);
+ pmd_clear(pmd);
+ pte_lock_deinit(page);
+ pte_free_tlb(tlb, page);
+ dec_page_state(nr_page_table_pages);
+ tlb->mm->nr_ptes--;
+}
+
+static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ unsigned long floor, unsigned long ceiling)
+{
+ pmd_t *pmd;
+ unsigned long next;
+ unsigned long start;
+
+ start = addr;
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ free_pte_range(tlb, pmd);
+ } while (pmd++, addr = next, addr != end);
+
+ start &= PUD_MASK;
+ if (start < floor)
+ return;
+ if (ceiling) {
+ ceiling &= PUD_MASK;
+ if (!ceiling)
+ return;
+ }
+ if (end - 1 > ceiling - 1)
+ return;
+
+ pmd = pmd_offset(pud, start);
+ pud_clear(pud);
+ pmd_free_tlb(tlb, pmd);
+}
+
+static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ unsigned long floor, unsigned long ceiling)
+{
+ pud_t *pud;
+ unsigned long next;
+ unsigned long start;
+
+ start = addr;
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ free_pmd_range(tlb, pud, addr, next, floor, ceiling);
+ } while (pud++, addr = next, addr != end);
+
+ start &= PGDIR_MASK;
+ if (start < floor)
+ return;
+ if (ceiling) {
+ ceiling &= PGDIR_MASK;
+ if (!ceiling)
+ return;
+ }
+ if (end - 1 > ceiling - 1)
+ return;
+
+ pud = pud_offset(pgd, start);
+ pgd_clear(pgd);
+ pud_free_tlb(tlb, pud);
+}
+
+/*
+ * This function frees user-level page tables of a process.
+ *
+ * Must be called with pagetable lock held.
+ */
+void free_page_table_range(struct mmu_gather **tlb,
+ unsigned long addr, unsigned long end,
+ unsigned long floor, unsigned long ceiling)
+{
+ pgd_t *pgd;
+ unsigned long next;
+ unsigned long start;
+
+ /*
+ * The next few lines have given us lots of grief...
+ *
+ * Why are we testing PMD* at this top level? Because often
+ * there will be no work to do at all, and we'd prefer not to
+ * go all the way down to the bottom just to discover that.
+ *
+ * Why all these "- 1"s? Because 0 represents both the bottom
+ * of the address space and the top of it (using -1 for the
+ * top wouldn't help much: the masks would do the wrong thing).
+ * The rule is that addr 0 and floor 0 refer to the bottom of
+ * the address space, but end 0 and ceiling 0 refer to the top
+ * Comparisons need to use "end - 1" and "ceiling - 1" (though
+ * that end 0 case should be mythical).
+ *
+ * Wherever addr is brought up or ceiling brought down, we must
+ * be careful to reject "the opposite 0" before it confuses the
+ * subsequent tests. But what about where end is brought down
+ * by PMD_SIZE below? no, end can't go down to 0 there.
+ *
+ * Whereas we round start (addr) and ceiling down, by different
+ * masks at different levels, in order to test whether a table
+ * now has no other vmas using it, so can be freed, we don't
+ * bother to round floor or end up - the tests don't need that.
+ */
+
+ addr &= PMD_MASK;
+ if (addr < floor) {
+ addr += PMD_SIZE;
+ if (!addr)
+ return;
+ }
+ if (ceiling) {
+ ceiling &= PMD_MASK;
+ if (!ceiling)
+ return;
+ }
+ if (end - 1 > ceiling - 1)
+ end -= PMD_SIZE;
+ if (addr > end - 1)
+ return;
+
+ start = addr;
+ pgd = pgd_offset((*tlb)->mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
+ } while (pgd++, addr = next, addr != end);
+
+ if (!(*tlb)->fullmm)
+ flush_tlb_pgtables((*tlb)->mm, start, end);
+}
+
int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
{
struct page *new = pte_alloc_one(mm, address);
@@ -103,3 +251,37 @@
return 0;
}
#endif /* __PAGETABLE_PUD_FOLDED */
+
+#ifndef __PAGETABLE_PMD_FOLDED
+/*
+ * Allocate page middle directory.
+ * We've already handled the fast-path in-line.
+ */
+int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
+{
+ pmd_t *new = pmd_alloc_one(mm, address);
+ if (!new)
+ return -ENOMEM;
+
+ spin_lock(&mm->page_table_lock);
+#ifndef __ARCH_HAS_4LEVEL_HACK
+ if (pud_present(*pud)) /* Another has populated it */
+ pmd_free(new);
+ else
+ pud_populate(mm, pud, new);
+#else
+ if (pgd_present(*pud)) /* Another has populated it */
+ pmd_free(new);
+ else
+ pgd_populate(mm, pud, new);
+#endif /* __ARCH_HAS_4LEVEL_HACK */
+ spin_unlock(&mm->page_table_lock);
+ return 0;
+}
+#else
+/* Workaround for gcc 2.96 */
+int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
+{
+ return 0;
+}
+#endif /* __PAGETABLE_PMD_FOLDED */
Index: linux-2.6.17.2/include/linux/mm.h
===================================================================
--- linux-2.6.17.2.orig/include/linux/mm.h 2006-07-09 00:06:00.654187720 +1000
+++ linux-2.6.17.2/include/linux/mm.h 2006-07-09 00:06:01.714026600 +1000
@@ -702,8 +702,6 @@
struct vm_area_struct *start_vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *);
-void free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
- unsigned long end, unsigned long floor, unsigned long ceiling);
void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
unsigned long floor, unsigned long ceiling);
int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 5/18] PTI - Abstract default page table
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
` (3 preceding siblings ...)
2006-07-13 4:27 ` [PATCH 4/18] " Paul Davies
@ 2006-07-13 4:27 ` Paul Davies
2006-07-13 4:27 ` [PATCH 6/18] " Paul Davies
` (12 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:27 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
t This patch does the following:
1) Continues page table abstraction from memory.c to pt-default.c
* page table deallocation iterator removed from memory.c
2) Abstraction of page table implementation in mm.h to pt-mm.h
* Puts implementation in mm.h into pt-mm.h
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
include/linux/pt-mm.h | 118 +++++++++++++++++++++++++++++++++++++++
mm/memory.c | 148 --------------------------------------------------
2 files changed, 118 insertions(+), 148 deletions(-)
Index: linux-2.6.17.2/include/linux/pt-mm.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17.2/include/linux/pt-mm.h 2006-07-08 23:56:38.660308704 +1000
@@ -0,0 +1,118 @@
+#ifndef _LINUX_PT_MM_H
+#define _LINUX_PT_MM_H 1
+
+int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
+int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
+int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+
+/*
+ * The following ifdef needed to get the 4level-fixup.h header to work.
+ * Remove it when 4level-fixup.h has been removed.
+ */
+#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
+static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+ return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
+ NULL: pud_offset(pgd, address);
+}
+
+static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
+{
+ return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
+ NULL: pmd_offset(pud, address);
+}
+#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
+
+static inline pmd_t *lookup_pmd(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = pgd_offset(mm, addr);
+ if (pgd_none_or_clear_bad(pgd))
+ return NULL;
+
+ pud = pud_offset(pgd, addr);
+ if (pud_none_or_clear_bad(pud))
+ return NULL;
+
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none_or_clear_bad(pmd))
+ return NULL;
+
+ return pmd;
+}
+
+static inline pmd_t *build_pmd(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd=NULL;
+
+ pgd = pgd_offset(mm, addr);
+ pud = pud_alloc(mm, pgd, addr);
+ if (!pud)
+ return NULL;
+
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return NULL;
+
+ if (!pmd_present(*pmd) && __pte_alloc(mm, pmd, addr))
+ return NULL;
+
+ return pmd;
+}
+
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+/*
+ * We tuck a spinlock to guard each pagetable page into its struct page,
+ * at page->private, with BUILD_BUG_ON to make sure that this will not
+ * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
+ * When freeing, reset page->mapping so free_pages_check won't complain.
+ */
+#define __pte_lockptr(page) &((page)->ptl)
+#define pte_lock_init(_page) do { \
+ spin_lock_init(__pte_lockptr(_page)); \
+} while (0)
+#define pte_lock_deinit(page) ((page)->mapping = NULL)
+#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
+#else
+/*
+ * We use mm->page_table_lock to guard all pagetable pages of the mm.
+ */
+#define pte_lock_init(page) do {} while (0)
+#define pte_lock_deinit(page) do {} while (0)
+#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
+#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+
+#define pte_offset_map_lock(mm, pmd, address, ptlp) \
+({ \
+ spinlock_t *__ptl = pte_lockptr(mm, pmd); \
+ pte_t *__pte = pte_offset_map(pmd, address); \
+ *(ptlp) = __ptl; \
+ spin_lock(__ptl); \
+ __pte; \
+})
+
+#define pte_unmap_unlock(pte, ptl) do { \
+ spin_unlock(ptl); \
+ pte_unmap(pte); \
+} while (0)
+
+#define pte_alloc_map(mm, pmd, address) \
+ ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+ NULL: pte_offset_map(pmd, address))
+
+#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
+ ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+ NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
+
+#define pte_alloc_kernel(pmd, address) \
+ ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+ NULL: pte_offset_kernel(pmd, address))
+
+
+#endif
Index: linux-2.6.17.2/mm/memory.c
===================================================================
--- linux-2.6.17.2.orig/mm/memory.c 2006-07-08 23:56:33.707061712 +1000
+++ linux-2.6.17.2/mm/memory.c 2006-07-08 23:56:57.978371912 +1000
@@ -91,154 +91,6 @@
}
__setup("norandmaps", disable_randmaps);
-/*
- * Note: this doesn't free the actual pages themselves. That
- * has been handled earlier when unmapping all the memory regions.
- */
-static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
-{
- struct page *page = pmd_page(*pmd);
- pmd_clear(pmd);
- pte_lock_deinit(page);
- pte_free_tlb(tlb, page);
- dec_page_state(nr_page_table_pages);
- tlb->mm->nr_ptes--;
-}
-
-static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pmd_t *pmd;
- unsigned long next;
- unsigned long start;
-
- start = addr;
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd))
- continue;
- free_pte_range(tlb, pmd);
- } while (pmd++, addr = next, addr != end);
-
- start &= PUD_MASK;
- if (start < floor)
- return;
- if (ceiling) {
- ceiling &= PUD_MASK;
- if (!ceiling)
- return;
- }
- if (end - 1 > ceiling - 1)
- return;
-
- pmd = pmd_offset(pud, start);
- pud_clear(pud);
- pmd_free_tlb(tlb, pmd);
-}
-
-static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pud_t *pud;
- unsigned long next;
- unsigned long start;
-
- start = addr;
- pud = pud_offset(pgd, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
- free_pmd_range(tlb, pud, addr, next, floor, ceiling);
- } while (pud++, addr = next, addr != end);
-
- start &= PGDIR_MASK;
- if (start < floor)
- return;
- if (ceiling) {
- ceiling &= PGDIR_MASK;
- if (!ceiling)
- return;
- }
- if (end - 1 > ceiling - 1)
- return;
-
- pud = pud_offset(pgd, start);
- pgd_clear(pgd);
- pud_free_tlb(tlb, pud);
-}
-
-/*
- * This function frees user-level page tables of a process.
- *
- * Must be called with pagetable lock held.
- */
-void free_pgd_range(struct mmu_gather **tlb,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pgd_t *pgd;
- unsigned long next;
- unsigned long start;
-
- /*
- * The next few lines have given us lots of grief...
- *
- * Why are we testing PMD* at this top level? Because often
- * there will be no work to do at all, and we'd prefer not to
- * go all the way down to the bottom just to discover that.
- *
- * Why all these "- 1"s? Because 0 represents both the bottom
- * of the address space and the top of it (using -1 for the
- * top wouldn't help much: the masks would do the wrong thing).
- * The rule is that addr 0 and floor 0 refer to the bottom of
- * the address space, but end 0 and ceiling 0 refer to the top
- * Comparisons need to use "end - 1" and "ceiling - 1" (though
- * that end 0 case should be mythical).
- *
- * Wherever addr is brought up or ceiling brought down, we must
- * be careful to reject "the opposite 0" before it confuses the
- * subsequent tests. But what about where end is brought down
- * by PMD_SIZE below? no, end can't go down to 0 there.
- *
- * Whereas we round start (addr) and ceiling down, by different
- * masks at different levels, in order to test whether a table
- * now has no other vmas using it, so can be freed, we don't
- * bother to round floor or end up - the tests don't need that.
- */
-
- addr &= PMD_MASK;
- if (addr < floor) {
- addr += PMD_SIZE;
- if (!addr)
- return;
- }
- if (ceiling) {
- ceiling &= PMD_MASK;
- if (!ceiling)
- return;
- }
- if (end - 1 > ceiling - 1)
- end -= PMD_SIZE;
- if (addr > end - 1)
- return;
-
- start = addr;
- pgd = pgd_offset((*tlb)->mm, addr);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
- } while (pgd++, addr = next, addr != end);
-
- if (!(*tlb)->fullmm)
- flush_tlb_pgtables((*tlb)->mm, start, end);
-}
-
void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
unsigned long floor, unsigned long ceiling)
{
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 6/18] PTI - Abstract default page table
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
` (4 preceding siblings ...)
2006-07-13 4:27 ` [PATCH 5/18] " Paul Davies
@ 2006-07-13 4:27 ` Paul Davies
2006-07-13 4:27 ` [PATCH 7/18] PTI - Page fault handler Paul Davies
` (11 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:27 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
1) Abstraction of page table implementation in mm.h to pt-mm.h
* Removes implementation from mm.h
2) Abstraction of page table implementation from asm-generic/pgtable.h
to asm-generic/pt-pgtable.h
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
asm-generic/pgtable.h | 73 --------------------------------------------
asm-generic/pt-pgtable.h | 77 +++++++++++++++++++++++++++++++++++++++++++++++
linux/mm.h | 71 -------------------------------------------
3 files changed, 79 insertions(+), 142 deletions(-)
Index: linux-2.6.17.2/include/linux/mm.h
===================================================================
--- linux-2.6.17.2.orig/include/linux/mm.h 2006-07-07 23:44:23.659612400 +1000
+++ linux-2.6.17.2/include/linux/mm.h 2006-07-08 00:01:41.280244328 +1000
@@ -798,76 +798,7 @@
extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl));
-int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
-int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
-
-/*
- * The following ifdef needed to get the 4level-fixup.h header to work.
- * Remove it when 4level-fixup.h has been removed.
- */
-#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
-static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
-{
- return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
- NULL: pud_offset(pgd, address);
-}
-
-static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
-{
- return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
- NULL: pmd_offset(pud, address);
-}
-#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
-
-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
-/*
- * We tuck a spinlock to guard each pagetable page into its struct page,
- * at page->private, with BUILD_BUG_ON to make sure that this will not
- * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
- * When freeing, reset page->mapping so free_pages_check won't complain.
- */
-#define __pte_lockptr(page) &((page)->ptl)
-#define pte_lock_init(_page) do { \
- spin_lock_init(__pte_lockptr(_page)); \
-} while (0)
-#define pte_lock_deinit(page) ((page)->mapping = NULL)
-#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
-#else
-/*
- * We use mm->page_table_lock to guard all pagetable pages of the mm.
- */
-#define pte_lock_init(page) do {} while (0)
-#define pte_lock_deinit(page) do {} while (0)
-#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
-#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
-
-#define pte_offset_map_lock(mm, pmd, address, ptlp) \
-({ \
- spinlock_t *__ptl = pte_lockptr(mm, pmd); \
- pte_t *__pte = pte_offset_map(pmd, address); \
- *(ptlp) = __ptl; \
- spin_lock(__ptl); \
- __pte; \
-})
-
-#define pte_unmap_unlock(pte, ptl) do { \
- spin_unlock(ptl); \
- pte_unmap(pte); \
-} while (0)
-
-#define pte_alloc_map(mm, pmd, address) \
- ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
- NULL: pte_offset_map(pmd, address))
-
-#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
- ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
- NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
-
-#define pte_alloc_kernel(pmd, address) \
- ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
- NULL: pte_offset_kernel(pmd, address))
+#include <linux/pt-mm.h>
extern void free_area_init(unsigned long * zones_size);
extern void free_area_init_node(int nid, pg_data_t *pgdat,
Index: linux-2.6.17.2/include/asm-generic/pt-pgtable.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17.2/include/asm-generic/pt-pgtable.h 2006-07-08 00:14:10.398361064 +1000
@@ -0,0 +1,77 @@
+#ifndef _ASM_GENERIC_DEFAULT_PGTABLE_H
+#define _ASM_GENERIC_DEFAULT_PGTABLE_H 1
+
+#ifndef __HAVE_ARCH_PGD_OFFSET_GATE
+#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
+#endif
+
+/*
+ * When walking page tables, get the address of the next boundary,
+ * or the end address of the range if that comes earlier. Although no
+ * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
+ */
+
+#define pgd_addr_end(addr, end) \
+({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+
+#ifndef pud_addr_end
+#define pud_addr_end(addr, end) \
+({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+#endif
+
+#ifndef pmd_addr_end
+#define pmd_addr_end(addr, end) \
+({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+#endif
+
+#ifndef __ASSEMBLY__
+/*
+ * When walking page tables, we usually want to skip any p?d_none entries;
+ * and any p?d_bad entries - reporting the error before resetting to none.
+ * Do the tests inline, but report and clear the bad entry in mm/memory.c.
+ */
+void pgd_clear_bad(pgd_t *);
+void pud_clear_bad(pud_t *);
+void pmd_clear_bad(pmd_t *);
+
+static inline int pgd_none_or_clear_bad(pgd_t *pgd)
+{
+ if (pgd_none(*pgd))
+ return 1;
+ if (unlikely(pgd_bad(*pgd))) {
+ pgd_clear_bad(pgd);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int pud_none_or_clear_bad(pud_t *pud)
+{
+ if (pud_none(*pud))
+ return 1;
+ if (unlikely(pud_bad(*pud))) {
+ pud_clear_bad(pud);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int pmd_none_or_clear_bad(pmd_t *pmd)
+{
+ if (pmd_none(*pmd))
+ return 1;
+ if (unlikely(pmd_bad(*pmd))) {
+ pmd_clear_bad(pmd);
+ return 1;
+ }
+ return 0;
+}
+#endif /* !__ASSEMBLY__ */
+
+#endif
Index: linux-2.6.17.2/include/asm-generic/pgtable.h
===================================================================
--- linux-2.6.17.2.orig/include/asm-generic/pgtable.h 2006-06-30 10:17:23.000000000 +1000
+++ linux-2.6.17.2/include/asm-generic/pgtable.h 2006-07-08 00:19:21.834250720 +1000
@@ -151,10 +151,6 @@
#define page_test_and_clear_young(page) (0)
#endif
-#ifndef __HAVE_ARCH_PGD_OFFSET_GATE
-#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
-#endif
-
#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
#define lazy_mmu_prot_update(pte) do { } while (0)
#endif
@@ -163,73 +159,6 @@
#define move_pte(pte, prot, old_addr, new_addr) (pte)
#endif
-/*
- * When walking page tables, get the address of the next boundary,
- * or the end address of the range if that comes earlier. Although no
- * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
- */
-
-#define pgd_addr_end(addr, end) \
-({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
- (__boundary - 1 < (end) - 1)? __boundary: (end); \
-})
-
-#ifndef pud_addr_end
-#define pud_addr_end(addr, end) \
-({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \
- (__boundary - 1 < (end) - 1)? __boundary: (end); \
-})
-#endif
-
-#ifndef pmd_addr_end
-#define pmd_addr_end(addr, end) \
-({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
- (__boundary - 1 < (end) - 1)? __boundary: (end); \
-})
-#endif
-
-#ifndef __ASSEMBLY__
-/*
- * When walking page tables, we usually want to skip any p?d_none entries;
- * and any p?d_bad entries - reporting the error before resetting to none.
- * Do the tests inline, but report and clear the bad entry in mm/memory.c.
- */
-void pgd_clear_bad(pgd_t *);
-void pud_clear_bad(pud_t *);
-void pmd_clear_bad(pmd_t *);
-
-static inline int pgd_none_or_clear_bad(pgd_t *pgd)
-{
- if (pgd_none(*pgd))
- return 1;
- if (unlikely(pgd_bad(*pgd))) {
- pgd_clear_bad(pgd);
- return 1;
- }
- return 0;
-}
-
-static inline int pud_none_or_clear_bad(pud_t *pud)
-{
- if (pud_none(*pud))
- return 1;
- if (unlikely(pud_bad(*pud))) {
- pud_clear_bad(pud);
- return 1;
- }
- return 0;
-}
-
-static inline int pmd_none_or_clear_bad(pmd_t *pmd)
-{
- if (pmd_none(*pmd))
- return 1;
- if (unlikely(pmd_bad(*pmd))) {
- pmd_clear_bad(pmd);
- return 1;
- }
- return 0;
-}
-#endif /* !__ASSEMBLY__ */
+#include <asm-generic/pt-pgtable.h>
#endif /* _ASM_GENERIC_PGTABLE_H */
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 7/18] PTI - Page fault handler
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
` (5 preceding siblings ...)
2006-07-13 4:27 ` [PATCH 6/18] " Paul Davies
@ 2006-07-13 4:27 ` Paul Davies
2006-07-13 4:27 ` [PATCH 8/18] " Paul Davies
` (10 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:27 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
1) Calls the PTI interface to abstract page table dependent calls
from the page fault handler functions.
2) Abstracts page table dependent generic asm-generic/tlb.h #defines
to asm-generic/pt-tlb.h
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
include/asm-generic/pt-tlb.h | 24 +++++++++++++++
include/asm-generic/tlb.h | 22 +------------
mm/memory.c | 68 ++++++++++++++++++-------------------------
3 files changed, 56 insertions(+), 58 deletions(-)
Index: linux-2.6.17.2/include/asm-generic/pt-tlb.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17.2/include/asm-generic/pt-tlb.h 2006-07-08 00:46:42.890847200 +1000
@@ -0,0 +1,24 @@
+#ifndef _ASM_GENERIC_PT_TLB_H
+#define _ASM_GENERIC_PT_TLB_H 1
+
+#define pte_free_tlb(tlb, ptep) \
+ do { \
+ tlb->need_flush = 1; \
+ __pte_free_tlb(tlb, ptep); \
+ } while (0)
+
+#ifndef __ARCH_HAS_4LEVEL_HACK
+#define pud_free_tlb(tlb, pudp) \
+ do { \
+ tlb->need_flush = 1; \
+ __pud_free_tlb(tlb, pudp); \
+ } while (0)
+#endif
+
+#define pmd_free_tlb(tlb, pmdp) \
+ do { \
+ tlb->need_flush = 1; \
+ __pmd_free_tlb(tlb, pmdp); \
+ } while (0)
+
+#endif
Index: linux-2.6.17.2/include/asm-generic/tlb.h
===================================================================
--- linux-2.6.17.2.orig/include/asm-generic/tlb.h 2006-07-08 00:46:34.233163368 +1000
+++ linux-2.6.17.2/include/asm-generic/tlb.h 2006-07-08 00:46:42.890847200 +1000
@@ -124,26 +124,8 @@
__tlb_remove_tlb_entry(tlb, ptep, address); \
} while (0)
-#define pte_free_tlb(tlb, ptep) \
- do { \
- tlb->need_flush = 1; \
- __pte_free_tlb(tlb, ptep); \
- } while (0)
-
-#ifndef __ARCH_HAS_4LEVEL_HACK
-#define pud_free_tlb(tlb, pudp) \
- do { \
- tlb->need_flush = 1; \
- __pud_free_tlb(tlb, pudp); \
- } while (0)
-#endif
-
-#define pmd_free_tlb(tlb, pmdp) \
- do { \
- tlb->need_flush = 1; \
- __pmd_free_tlb(tlb, pmdp); \
- } while (0)
-
#define tlb_migrate_finish(mm) do {} while (0)
+#include <asm-generic/pt-tlb.h>
+
#endif /* _ASM_GENERIC__TLB_H */
Index: linux-2.6.17.2/mm/memory.c
===================================================================
--- linux-2.6.17.2.orig/mm/memory.c 2006-07-08 00:46:42.857852216 +1000
+++ linux-2.6.17.2/mm/memory.c 2006-07-08 01:04:37.485483984 +1000
@@ -1210,6 +1210,7 @@
* but allow concurrent faults), with pte both mapped and locked.
* We return with mmap_sem still held, but pte unmapped and unlocked.
*/
+
static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
spinlock_t *ptl, pte_t orig_pte)
@@ -1738,11 +1739,10 @@
* We return with mmap_sem still held, but pte unmapped and unlocked.
*/
static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *page_table, pmd_t *pmd,
+ unsigned long address, pte_t *page_table, pt_path_t pt_path,
int write_access)
{
struct page *page;
- spinlock_t *ptl;
pte_t entry;
if (write_access) {
@@ -1758,7 +1758,7 @@
entry = mk_pte(page, vma->vm_page_prot);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
- page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+ page_table = lookup_page_table_fast(mm, pt_path, address);
if (!pte_none(*page_table))
goto release;
inc_mm_counter(mm, anon_rss);
@@ -1770,8 +1770,8 @@
page_cache_get(page);
entry = mk_pte(page, vma->vm_page_prot);
- ptl = pte_lockptr(mm, pmd);
- spin_lock(ptl);
+ lock_pte(mm, pt_path);
+
if (!pte_none(*page_table))
goto release;
inc_mm_counter(mm, file_rss);
@@ -1784,7 +1784,8 @@
update_mmu_cache(vma, address, entry);
lazy_mmu_prot_update(entry);
unlock:
- pte_unmap_unlock(page_table, ptl);
+ unlock_pte(mm, pt_path);
+ pte_unmap(page_table);
return VM_FAULT_MINOR;
release:
page_cache_release(page);
@@ -1807,10 +1808,9 @@
* We return with mmap_sem still held, but pte unmapped and unlocked.
*/
static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *page_table, pmd_t *pmd,
+ unsigned long address, pte_t *page_table, pt_path_t pt_path,
int write_access)
{
- spinlock_t *ptl;
struct page *new_page;
struct address_space *mapping = NULL;
pte_t entry;
@@ -1859,14 +1859,17 @@
anon = 1;
}
- page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+ page_table = lookup_page_table_fast(mm, pt_path, address);
+
/*
* For a file-backed vma, someone could have truncated or otherwise
* invalidated this page. If unmap_mapping_range got called,
* retry getting the page.
*/
if (mapping && unlikely(sequence != mapping->truncate_count)) {
- pte_unmap_unlock(page_table, ptl);
+ unlock_pte(mm, pt_path);
+ pte_unmap(page_table);
+
page_cache_release(new_page);
cond_resched();
sequence = mapping->truncate_count;
@@ -1909,7 +1912,8 @@
update_mmu_cache(vma, address, entry);
lazy_mmu_prot_update(entry);
unlock:
- pte_unmap_unlock(page_table, ptl);
+ unlock_pte(mm, pt_path);
+ pte_unmap(page_table);
return ret;
oom:
page_cache_release(new_page);
@@ -1926,13 +1930,13 @@
* We return with mmap_sem still held, but pte unmapped and unlocked.
*/
static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *page_table, pmd_t *pmd,
+ unsigned long address, pte_t *page_table, pt_path_t pt_path,
int write_access, pte_t orig_pte)
{
pgoff_t pgoff;
int err;
- if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
+ if (!pte_unmap_same(mm, pt_path, page_table, orig_pte))
return VM_FAULT_MINOR;
if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
@@ -1969,36 +1973,35 @@
*/
static inline int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
- pte_t *pte, pmd_t *pmd, int write_access)
+ pte_t *pte, pt_path_t pt_path, int write_access)
{
pte_t entry;
pte_t old_entry;
- spinlock_t *ptl;
old_entry = entry = *pte;
if (!pte_present(entry)) {
if (pte_none(entry)) {
if (!vma->vm_ops || !vma->vm_ops->nopage)
return do_anonymous_page(mm, vma, address,
- pte, pmd, write_access);
+ pte, pt_path, write_access);
return do_no_page(mm, vma, address,
- pte, pmd, write_access);
+ pte, pt_path, write_access);
}
if (pte_file(entry))
return do_file_page(mm, vma, address,
- pte, pmd, write_access, entry);
+ pte, pt_path, write_access, entry);
return do_swap_page(mm, vma, address,
- pte, pmd, write_access, entry);
+ pte, pt_path, write_access, entry);
}
- ptl = pte_lockptr(mm, pmd);
- spin_lock(ptl);
+ lock_pte(mm, pt_path);
+
if (unlikely(!pte_same(*pte, entry)))
goto unlock;
if (write_access) {
if (!pte_write(entry))
return do_wp_page(mm, vma, address,
- pte, pmd, ptl, entry);
+ pte, pt_path, entry);
entry = pte_mkdirty(entry);
}
entry = pte_mkyoung(entry);
@@ -2017,7 +2020,8 @@
flush_tlb_page(vma, address);
}
unlock:
- pte_unmap_unlock(pte, ptl);
+ unlock_pte(mm, pt_path);
+ pte_unmap(pte);
return VM_FAULT_MINOR;
}
@@ -2027,30 +2031,18 @@
int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, int write_access)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
pte_t *pte;
+ pt_path_t pt_path;
__set_current_state(TASK_RUNNING);
inc_page_state(pgfault);
- if (unlikely(is_vm_hugetlb_page(vma)))
- return hugetlb_fault(mm, vma, address, write_access);
-
- pgd = pgd_offset(mm, address);
- pud = pud_alloc(mm, pgd, address);
- if (!pud)
- return VM_FAULT_OOM;
- pmd = pmd_alloc(mm, pud, address);
- if (!pmd)
- return VM_FAULT_OOM;
- pte = pte_alloc_map(mm, pmd, address);
+ pte = build_page_table(mm, address, &pt_path);
if (!pte)
return VM_FAULT_OOM;
- return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
+ return handle_pte_fault(mm, vma, address, pte, pt_path, write_access);
}
EXPORT_SYMBOL_GPL(__handle_mm_fault);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 8/18] PTI - Page fault handler
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
` (6 preceding siblings ...)
2006-07-13 4:27 ` [PATCH 7/18] PTI - Page fault handler Paul Davies
@ 2006-07-13 4:27 ` Paul Davies
2006-07-13 4:28 ` [PATCH 9/18] PTI - Call interface Paul Davies
` (9 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:27 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
1) Continues calling the PTI interface to abstract page table dependent
calls from the page fault handler functions.
2) Remove get_locked_pte from memory.c to be replaced by build_page_table.
3) Call lookup_page_table in filemap_xip.c
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
include/linux/mm.h | 2 -
mm/filemap_xip.c | 26 +++++++++++++++++++---
mm/memory.c | 62 +++++++++++++++++++++--------------------------------
3 files changed, 48 insertions(+), 42 deletions(-)
Index: linux-2.6.17.2/mm/memory.c
===================================================================
--- linux-2.6.17.2.orig/mm/memory.c 2006-07-08 20:35:44.823860064 +1000
+++ linux-2.6.17.2/mm/memory.c 2006-07-08 20:38:43.631677144 +1000
@@ -934,18 +934,6 @@
return err;
}
-pte_t * fastcall get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)
-{
- pgd_t * pgd = pgd_offset(mm, addr);
- pud_t * pud = pud_alloc(mm, pgd, addr);
- if (pud) {
- pmd_t * pmd = pmd_alloc(mm, pud, addr);
- if (pmd)
- return pte_alloc_map_lock(mm, pmd, addr, ptl);
- }
- return NULL;
-}
-
/*
* This is the old fallback for page remapping.
*
@@ -957,14 +945,14 @@
{
int retval;
pte_t *pte;
- spinlock_t *ptl;
+ pt_path_t pt_path;
retval = -EINVAL;
if (PageAnon(page))
goto out;
retval = -ENOMEM;
flush_dcache_page(page);
- pte = get_locked_pte(mm, addr, &ptl);
+ pte = build_page_table(mm, addr, &pt_path);
if (!pte)
goto out;
retval = -EBUSY;
@@ -979,7 +967,8 @@
retval = 0;
out_unlock:
- pte_unmap_unlock(pte, ptl);
+ unlock_pte(mm, pt_path);
+ pte_unmap(pte);
out:
return retval;
}
@@ -1136,17 +1125,13 @@
* (but do_wp_page is only called after already making such a check;
* and do_anonymous_page and do_no_page can safely check later on).
*/
-static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
+static inline int pte_unmap_same(struct mm_struct *mm, pt_path_t pt_path,
pte_t *page_table, pte_t orig_pte)
{
int same = 1;
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
- if (sizeof(pte_t) > sizeof(unsigned long)) {
- spinlock_t *ptl = pte_lockptr(mm, pmd);
- spin_lock(ptl);
- same = pte_same(*page_table, orig_pte);
- spin_unlock(ptl);
- }
+ if (sizeof(pte_t) > sizeof(unsigned long))
+ same = atomic_pte_same(mm, page_table, orig_pte, pt_path);
#endif
pte_unmap(page_table);
return same;
@@ -1210,10 +1195,9 @@
* but allow concurrent faults), with pte both mapped and locked.
* We return with mmap_sem still held, but pte unmapped and unlocked.
*/
-
static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *page_table, pmd_t *pmd,
- spinlock_t *ptl, pte_t orig_pte)
+ unsigned long address, pte_t *page_table, pt_path_t pt_path,
+ pte_t orig_pte)
{
struct page *old_page, *new_page;
pte_t entry;
@@ -1243,7 +1227,8 @@
*/
page_cache_get(old_page);
gotten:
- pte_unmap_unlock(page_table, ptl);
+ unlock_pte(mm, pt_path);
+ pte_unmap(page_table);
if (unlikely(anon_vma_prepare(vma)))
goto oom;
@@ -1261,7 +1246,7 @@
/*
* Re-check the pte - we dropped the lock
*/
- page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+ page_table = lookup_page_table_fast(mm, pt_path, address);
if (likely(pte_same(*page_table, orig_pte))) {
if (old_page) {
page_remove_rmap(old_page);
@@ -1289,7 +1274,8 @@
if (old_page)
page_cache_release(old_page);
unlock:
- pte_unmap_unlock(page_table, ptl);
+ unlock_pte(mm, pt_path);
+ pte_unmap(page_table);
return ret;
oom:
if (old_page)
@@ -1638,16 +1624,15 @@
* We return with mmap_sem still held, but pte unmapped and unlocked.
*/
static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *page_table, pmd_t *pmd,
+ unsigned long address, pte_t *page_table, pt_path_t pt_path,
int write_access, pte_t orig_pte)
{
- spinlock_t *ptl;
struct page *page;
swp_entry_t entry;
pte_t pte;
int ret = VM_FAULT_MINOR;
- if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
+ if (!pte_unmap_same(mm, pt_path, page_table, orig_pte))
goto out;
entry = pte_to_swp_entry(orig_pte);
@@ -1661,7 +1646,7 @@
* Back out if somebody else faulted in this pte
* while we released the pte lock.
*/
- page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+ page_table = lookup_page_table_fast(mm, pt_path, address);
if (likely(pte_same(*page_table, orig_pte)))
ret = VM_FAULT_OOM;
goto unlock;
@@ -1685,7 +1670,7 @@
/*
* Back out if somebody else already faulted in this pte.
*/
- page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+ page_table = lookup_page_table_fast(mm, pt_path, address);
if (unlikely(!pte_same(*page_table, orig_pte)))
goto out_nomap;
@@ -1713,8 +1698,8 @@
unlock_page(page);
if (write_access) {
- if (do_wp_page(mm, vma, address,
- page_table, pmd, ptl, pte) == VM_FAULT_OOM)
+ if (do_wp_page(mm, vma, address,
+ page_table, pt_path, pte) == VM_FAULT_OOM)
ret = VM_FAULT_OOM;
goto out;
}
@@ -1723,11 +1708,14 @@
update_mmu_cache(vma, address, pte);
lazy_mmu_prot_update(pte);
unlock:
- pte_unmap_unlock(page_table, ptl);
+ unlock_pte(mm, pt_path);
+ pte_unmap(page_table);
out:
return ret;
out_nomap:
- pte_unmap_unlock(page_table, ptl);
+ unlock_pte(mm, pt_path);
+ pte_unmap(page_table);
+
unlock_page(page);
page_cache_release(page);
return ret;
Index: linux-2.6.17.2/mm/filemap_xip.c
===================================================================
--- linux-2.6.17.2.orig/mm/filemap_xip.c 2006-07-08 20:35:44.823860064 +1000
+++ linux-2.6.17.2/mm/filemap_xip.c 2006-07-08 20:35:46.151658208 +1000
@@ -16,6 +16,8 @@
#include <asm/tlbflush.h>
#include "filemap.h"
+#include <linux/pt.h>
+
/*
* This is a file read routine for execute in place files, and uses
* the mapping->a_ops->get_xip_page() function for the actual low-level
@@ -174,7 +176,7 @@
unsigned long address;
pte_t *pte;
pte_t pteval;
- spinlock_t *ptl;
+ pt_path_t pt_path;
struct page *page;
spin_lock(&mapping->i_mmap_lock);
@@ -184,7 +186,23 @@
((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
page = ZERO_PAGE(address);
- pte = page_check_address(page, mm, address, &ptl);
+ pte = lookup_page_table(mm, address, &pt_path);
+ if(!pte)
+ goto out;
+
+ /* Make a quick check before getting the lock */
+ if (!pte_present(*pte)) {
+ pte_unmap(pte);
+ goto out;
+ }
+
+ lock_pte(mm, pt_path);
+ if (!(pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte))) {
+ unlock_pte(mm, pt_path);
+ pte_unmap(pte);
+ goto out;
+ }
+
if (pte) {
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
@@ -192,10 +210,12 @@
page_remove_rmap(page);
dec_mm_counter(mm, file_rss);
BUG_ON(pte_dirty(pteval));
- pte_unmap_unlock(pte, ptl);
+ unlock_pte(mm, pt_path);
+ pte_unmap(pte);
page_cache_release(page);
}
}
+out:
spin_unlock(&mapping->i_mmap_lock);
}
Index: linux-2.6.17.2/include/linux/mm.h
===================================================================
--- linux-2.6.17.2.orig/include/linux/mm.h 2006-07-08 20:35:44.899848512 +1000
+++ linux-2.6.17.2/include/linux/mm.h 2006-07-08 20:35:46.153657904 +1000
@@ -796,8 +796,6 @@
unsigned long private_dirty;
};
-extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl));
-
#include <linux/pt-mm.h>
extern void free_area_init(unsigned long * zones_size);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 9/18] PTI - Call interface
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
` (7 preceding siblings ...)
2006-07-13 4:27 ` [PATCH 8/18] " Paul Davies
@ 2006-07-13 4:28 ` Paul Davies
2006-07-13 4:28 ` [PATCH 10/18] PTI - Copy iterator abstraction Paul Davies
` (8 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:28 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
Calls PTI across various files.
* memory.c - follow page calls lookup_page_table.
- get_user_pages calls lookup_gate_area
- vmalloc_to_page calls lookup_page_table
* fs/exec.c - make calls to build_page_table
* rmap.c - call lookup_page table and absorb page_check_address
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
fs/exec.c | 14 ++++++---
mm/fremap.c | 18 ++++++++----
mm/memory.c | 73 ++++++++++--------------------------------------
mm/rmap.c | 90 ++++++++++++++++++++++++------------------------------------
4 files changed, 73 insertions(+), 122 deletions(-)
Index: linux-2.6.17.2/mm/memory.c
===================================================================
--- linux-2.6.17.2.orig/mm/memory.c 2006-07-08 22:25:23.651317624 +1000
+++ linux-2.6.17.2/mm/memory.c 2006-07-08 22:25:25.380054816 +1000
@@ -656,40 +656,17 @@
struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
unsigned int flags)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
pte_t *ptep, pte;
- spinlock_t *ptl;
+ pt_path_t pt_path;
struct page *page;
struct mm_struct *mm = vma->vm_mm;
- page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
- if (!IS_ERR(page)) {
- BUG_ON(flags & FOLL_GET);
- goto out;
- }
-
page = NULL;
- pgd = pgd_offset(mm, address);
- if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
- goto no_page_table;
-
- pud = pud_offset(pgd, address);
- if (pud_none(*pud) || unlikely(pud_bad(*pud)))
- goto no_page_table;
-
- pmd = pmd_offset(pud, address);
- if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+ ptep = lookup_page_table(mm, address, &pt_path);
+ if (!ptep)
goto no_page_table;
- if (pmd_huge(*pmd)) {
- BUG_ON(flags & FOLL_GET);
- page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
- goto out;
- }
-
- ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+ lock_pte(mm, pt_path);
if (!ptep)
goto out;
@@ -711,7 +688,8 @@
mark_page_accessed(page);
}
unlock:
- pte_unmap_unlock(ptep, ptl);
+ unlock_pte(mm, pt_path);
+ pte_unmap(pte);
out:
return page;
@@ -752,23 +730,10 @@
if (!vma && in_gate_area(tsk, start)) {
unsigned long pg = start & PAGE_MASK;
struct vm_area_struct *gate_vma = get_gate_vma(tsk);
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
pte_t *pte;
if (write) /* user gate pages are read-only */
return i ? : -EFAULT;
- if (pg > TASK_SIZE)
- pgd = pgd_offset_k(pg);
- else
- pgd = pgd_offset_gate(mm, pg);
- BUG_ON(pgd_none(*pgd));
- pud = pud_offset(pgd, pg);
- BUG_ON(pud_none(*pud));
- pmd = pmd_offset(pud, pg);
- if (pmd_none(*pmd))
- return i ? : -EFAULT;
- pte = pte_offset_map(pmd, pg);
+ pte = lookup_gate_area(mm, pg);
if (pte_none(*pte)) {
pte_unmap(pte);
return i ? : -EFAULT;
@@ -855,6 +820,7 @@
} while (len);
return i;
}
+
EXPORT_SYMBOL(get_user_pages);
static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
@@ -2061,23 +2027,14 @@
{
unsigned long addr = (unsigned long) vmalloc_addr;
struct page *page = NULL;
- pgd_t *pgd = pgd_offset_k(addr);
- pud_t *pud;
- pmd_t *pmd;
pte_t *ptep, pte;
-
- if (!pgd_none(*pgd)) {
- pud = pud_offset(pgd, addr);
- if (!pud_none(*pud)) {
- pmd = pmd_offset(pud, addr);
- if (!pmd_none(*pmd)) {
- ptep = pte_offset_map(pmd, addr);
- pte = *ptep;
- if (pte_present(pte))
- page = pte_page(pte);
- pte_unmap(ptep);
- }
- }
+
+ ptep = lookup_page_table(&init_mm, addr, NULL);
+ if(ptep) {
+ pte = *ptep;
+ if (pte_present(pte))
+ page = pte_page(pte);
+ pte_unmap(ptep);
}
return page;
}
Index: linux-2.6.17.2/fs/exec.c
===================================================================
--- linux-2.6.17.2.orig/fs/exec.c 2006-07-08 22:25:23.652317472 +1000
+++ linux-2.6.17.2/fs/exec.c 2006-07-08 22:25:25.381054664 +1000
@@ -49,6 +49,7 @@
#include <linux/rmap.h>
#include <linux/acct.h>
#include <linux/cn_proc.h>
+#include <linux/pt.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -307,17 +308,20 @@
{
struct mm_struct *mm = vma->vm_mm;
pte_t * pte;
- spinlock_t *ptl;
+ pt_path_t pt_path;
if (unlikely(anon_vma_prepare(vma)))
goto out;
flush_dcache_page(page);
- pte = get_locked_pte(mm, address, &ptl);
+ pte = build_page_table(mm, address, &pt_path);
+ lock_pte(mm, pt_path);
+
if (!pte)
goto out;
if (!pte_none(*pte)) {
- pte_unmap_unlock(pte, ptl);
+ unlock_pte(mm, pt_path);
+ pte_unmap(pte);
goto out;
}
inc_mm_counter(mm, anon_rss);
@@ -325,8 +329,8 @@
set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
page, vma->vm_page_prot))));
page_add_new_anon_rmap(page, vma, address);
- pte_unmap_unlock(pte, ptl);
-
+ unlock_pte(mm, pt_path);
+ pte_unmap(pte);
/* no need for flush_tlb */
return;
out:
Index: linux-2.6.17.2/mm/fremap.c
===================================================================
--- linux-2.6.17.2.orig/mm/fremap.c 2006-07-08 22:25:23.651317624 +1000
+++ linux-2.6.17.2/mm/fremap.c 2006-07-08 22:25:25.381054664 +1000
@@ -15,6 +15,7 @@
#include <linux/rmap.h>
#include <linux/module.h>
#include <linux/syscalls.h>
+#include <linux/pt.h>
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
@@ -56,9 +57,10 @@
int err = -ENOMEM;
pte_t *pte;
pte_t pte_val;
- spinlock_t *ptl;
+ pt_path_t pt_path;
- pte = get_locked_pte(mm, addr, &ptl);
+ pte = build_page_table(mm, addr, &pt_path);
+ lock_pte(mm, pt_path);
if (!pte)
goto out;
@@ -85,7 +87,9 @@
update_mmu_cache(vma, addr, pte_val);
err = 0;
unlock:
- pte_unmap_unlock(pte, ptl);
+ unlock_pte(mm, pt_path);
+ pte_unmap(pte);
+
out:
return err;
}
@@ -101,9 +105,10 @@
int err = -ENOMEM;
pte_t *pte;
pte_t pte_val;
- spinlock_t *ptl;
+ pt_path_t pt_path;
- pte = get_locked_pte(mm, addr, &ptl);
+ pte = build_page_table(mm, addr, &pt_path);
+ lock_pte(mm, pt_path);
if (!pte)
goto out;
@@ -115,7 +120,8 @@
set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
pte_val = *pte;
update_mmu_cache(vma, addr, pte_val);
- pte_unmap_unlock(pte, ptl);
+ unlock_pte(mm, pt_path);
+ pte_unmap(pte);
err = 0;
out:
return err;
Index: linux-2.6.17.2/mm/rmap.c
===================================================================
--- linux-2.6.17.2.orig/mm/rmap.c 2006-07-08 22:25:23.652317472 +1000
+++ linux-2.6.17.2/mm/rmap.c 2006-07-08 22:28:41.974167968 +1000
@@ -53,6 +53,7 @@
#include <linux/rmap.h>
#include <linux/rcupdate.h>
#include <linux/module.h>
+#include <linux/pt.h>
#include <asm/tlbflush.h>
@@ -281,49 +282,6 @@
}
/*
- * Check that @page is mapped at @address into @mm.
- *
- * On success returns with pte mapped and locked.
- */
-pte_t *page_check_address(struct page *page, struct mm_struct *mm,
- unsigned long address, spinlock_t **ptlp)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- spinlock_t *ptl;
-
- pgd = pgd_offset(mm, address);
- if (!pgd_present(*pgd))
- return NULL;
-
- pud = pud_offset(pgd, address);
- if (!pud_present(*pud))
- return NULL;
-
- pmd = pmd_offset(pud, address);
- if (!pmd_present(*pmd))
- return NULL;
-
- pte = pte_offset_map(pmd, address);
- /* Make a quick check before getting the lock */
- if (!pte_present(*pte)) {
- pte_unmap(pte);
- return NULL;
- }
-
- ptl = pte_lockptr(mm, pmd);
- spin_lock(ptl);
- if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
- *ptlp = ptl;
- return pte;
- }
- pte_unmap_unlock(pte, ptl);
- return NULL;
-}
-
-/*
* Subfunctions of page_referenced: page_referenced_one called
* repeatedly from either page_referenced_anon or page_referenced_file.
*/
@@ -333,16 +291,29 @@
struct mm_struct *mm = vma->vm_mm;
unsigned long address;
pte_t *pte;
- spinlock_t *ptl;
+ pt_path_t pt_path;
int referenced = 0;
address = vma_address(page, vma);
if (address == -EFAULT)
goto out;
- pte = page_check_address(page, mm, address, &ptl);
- if (!pte)
+ pte = lookup_page_table(mm, address, &pt_path);
+ if(!pte)
+ goto out;
+
+ /* Make a quick check before getting the lock */
+ if (!pte_present(*pte)) {
+ pte_unmap(pte);
+ goto out;
+ }
+
+ lock_pte(mm, pt_path);
+ if (!(pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte))) {
+ unlock_pte(mm, pt_path);
+ pte_unmap(pte);
goto out;
+ }
if (ptep_clear_flush_young(vma, address, pte))
referenced++;
@@ -354,7 +325,8 @@
referenced++;
(*mapcount)--;
- pte_unmap_unlock(pte, ptl);
+ unlock_pte(mm, pt_path);
+ pte_unmap(pte);
out:
return referenced;
}
@@ -583,17 +555,30 @@
struct mm_struct *mm = vma->vm_mm;
unsigned long address;
pte_t *pte;
+ pt_path_t pt_path;
pte_t pteval;
- spinlock_t *ptl;
int ret = SWAP_AGAIN;
address = vma_address(page, vma);
if (address == -EFAULT)
goto out;
- pte = page_check_address(page, mm, address, &ptl);
- if (!pte)
+ pte = lookup_page_table(mm, address, &pt_path);
+ if(!pte)
+ goto out;
+
+ /* Make a quick check before getting the lock */
+ if (!pte_present(*pte)) {
+ pte_unmap(pte);
+ goto out;
+ }
+
+ lock_pte(mm, pt_path);
+ if (!(pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte))) {
+ unlock_pte(mm, pt_path);
+ pte_unmap(pte);
goto out;
+ }
/*
* If the page is mlock()d, we cannot swap it out.
@@ -642,7 +627,8 @@
page_cache_release(page);
out_unmap:
- pte_unmap_unlock(pte, ptl);
+ unlock_pte(mm, pt_path);
+ pte_unmap(pte);
out:
return ret;
}
@@ -666,8 +652,6 @@
* there there won't be many ptes located within the scan cluster. In this case
* maybe we could scan further - to the end of the pte page, perhaps.
*/
-#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
-#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
static void try_to_unmap_cluster(unsigned long cursor,
unsigned int *mapcount, struct vm_area_struct *vma)
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 10/18] PTI - Copy iterator abstraction
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
` (8 preceding siblings ...)
2006-07-13 4:28 ` [PATCH 9/18] PTI - Call interface Paul Davies
@ 2006-07-13 4:28 ` Paul Davies
2006-07-13 4:28 ` [PATCH 11/18] PTI - Unmap page range abstraction Paul Davies
` (7 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:28 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
Abstracts copy_page_range iterator from memory.c to pt_default.c
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
memory.c | 108 +------------------------------------------------------
pt-default.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 116 insertions(+), 106 deletions(-)
Index: linux-2.6.17.2/mm/pt-default.c
===================================================================
--- linux-2.6.17.2.orig/mm/pt-default.c 2006-07-08 19:37:21.480345200 +1000
+++ linux-2.6.17.2/mm/pt-default.c 2006-07-08 19:39:36.358840552 +1000
@@ -285,3 +285,117 @@
return 0;
}
#endif /* __PAGETABLE_PMD_FOLDED */
+
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end)
+{
+ pte_t *src_pte, *dst_pte;
+ spinlock_t *src_ptl, *dst_ptl;
+ int progress = 0;
+ int rss[2];
+
+again:
+ rss[1] = rss[0] = 0;
+ dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
+ if (!dst_pte)
+ return -ENOMEM;
+ src_pte = pte_offset_map_nested(src_pmd, addr);
+ src_ptl = pte_lockptr(src_mm, src_pmd);
+ spin_lock(src_ptl);
+
+ do {
+ /*
+ * We are holding two locks at this point - either of them
+ * could generate latencies in another task on another CPU.
+ */
+ if (progress >= 32) {
+ progress = 0;
+ if (need_resched() ||
+ need_lockbreak(src_ptl) ||
+ need_lockbreak(dst_ptl))
+ break;
+ }
+ if (pte_none(*src_pte)) {
+ progress++;
+ continue;
+ }
+ copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
+ progress += 8;
+ } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+
+ spin_unlock(src_ptl);
+ pte_unmap_nested(src_pte - 1);
+ add_mm_rss(dst_mm, rss[0], rss[1]);
+ pte_unmap_unlock(dst_pte - 1, dst_ptl);
+ cond_resched();
+ if (addr != end)
+ goto again;
+ return 0;
+}
+
+static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end)
+{
+ pmd_t *src_pmd, *dst_pmd;
+ unsigned long next;
+
+ dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
+ if (!dst_pmd)
+ return -ENOMEM;
+ src_pmd = pmd_offset(src_pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(src_pmd))
+ continue;
+ if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
+ vma, addr, next))
+ return -ENOMEM;
+ } while (dst_pmd++, src_pmd++, addr = next, addr != end);
+ return 0;
+}
+
+static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end)
+{
+ pud_t *src_pud, *dst_pud;
+ unsigned long next;
+
+ dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
+ if (!dst_pud)
+ return -ENOMEM;
+ src_pud = pud_offset(src_pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(src_pud))
+ continue;
+ if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
+ vma, addr, next))
+ return -ENOMEM;
+ } while (dst_pud++, src_pud++, addr = next, addr != end);
+ return 0;
+}
+
+int copy_dual_iterator(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ unsigned long addr, unsigned long end, struct vm_area_struct *vma)
+{
+ pgd_t *src_pgd;
+ pgd_t *dst_pgd;
+ unsigned long next;
+
+ dst_pgd = pgd_offset(dst_mm, addr);
+ src_pgd = pgd_offset(src_mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(src_pgd))
+ continue;
+
+ if (copy_pud_range(dst_mm, src_mm, dst_pgd,
+ src_pgd, vma, addr, next))
+ return -ENOMEM;
+
+ } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+ return 0;
+}
Index: linux-2.6.17.2/mm/memory.c
===================================================================
--- linux-2.6.17.2.orig/mm/memory.c 2006-07-08 19:37:21.480345200 +1000
+++ linux-2.6.17.2/mm/memory.c 2006-07-08 19:41:49.099660880 +1000
@@ -193,7 +193,7 @@
* covered by this vma.
*/
-static inline void
+void
copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
unsigned long addr, int *rss)
@@ -246,103 +246,9 @@
set_pte_at(dst_mm, addr, dst_pte, pte);
}
-static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
- pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
- unsigned long addr, unsigned long end)
-{
- pte_t *src_pte, *dst_pte;
- spinlock_t *src_ptl, *dst_ptl;
- int progress = 0;
- int rss[2];
-
-again:
- rss[1] = rss[0] = 0;
- dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
- if (!dst_pte)
- return -ENOMEM;
- src_pte = pte_offset_map_nested(src_pmd, addr);
- src_ptl = pte_lockptr(src_mm, src_pmd);
- spin_lock(src_ptl);
-
- do {
- /*
- * We are holding two locks at this point - either of them
- * could generate latencies in another task on another CPU.
- */
- if (progress >= 32) {
- progress = 0;
- if (need_resched() ||
- need_lockbreak(src_ptl) ||
- need_lockbreak(dst_ptl))
- break;
- }
- if (pte_none(*src_pte)) {
- progress++;
- continue;
- }
- copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
- progress += 8;
- } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
-
- spin_unlock(src_ptl);
- pte_unmap_nested(src_pte - 1);
- add_mm_rss(dst_mm, rss[0], rss[1]);
- pte_unmap_unlock(dst_pte - 1, dst_ptl);
- cond_resched();
- if (addr != end)
- goto again;
- return 0;
-}
-
-static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
- pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
- unsigned long addr, unsigned long end)
-{
- pmd_t *src_pmd, *dst_pmd;
- unsigned long next;
-
- dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
- if (!dst_pmd)
- return -ENOMEM;
- src_pmd = pmd_offset(src_pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(src_pmd))
- continue;
- if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
- vma, addr, next))
- return -ENOMEM;
- } while (dst_pmd++, src_pmd++, addr = next, addr != end);
- return 0;
-}
-
-static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
- pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
- unsigned long addr, unsigned long end)
-{
- pud_t *src_pud, *dst_pud;
- unsigned long next;
-
- dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
- if (!dst_pud)
- return -ENOMEM;
- src_pud = pud_offset(src_pgd, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(src_pud))
- continue;
- if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
- vma, addr, next))
- return -ENOMEM;
- } while (dst_pud++, src_pud++, addr = next, addr != end);
- return 0;
-}
-
int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
struct vm_area_struct *vma)
{
- pgd_t *src_pgd, *dst_pgd;
- unsigned long next;
unsigned long addr = vma->vm_start;
unsigned long end = vma->vm_end;
@@ -360,17 +266,7 @@
if (is_vm_hugetlb_page(vma))
return copy_hugetlb_page_range(dst_mm, src_mm, vma);
- dst_pgd = pgd_offset(dst_mm, addr);
- src_pgd = pgd_offset(src_mm, addr);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(src_pgd))
- continue;
- if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
- vma, addr, next))
- return -ENOMEM;
- } while (dst_pgd++, src_pgd++, addr = next, addr != end);
- return 0;
+ return copy_dual_iterator(dst_mm, src_mm, addr, end, vma);
}
static unsigned long zap_pte_range(struct mmu_gather *tlb,
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 11/18] PTI - Unmap page range abstraction
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
` (9 preceding siblings ...)
2006-07-13 4:28 ` [PATCH 10/18] PTI - Copy iterator abstraction Paul Davies
@ 2006-07-13 4:28 ` Paul Davies
2006-07-13 4:28 ` [PATCH 12/18] PTI - Zeromap iterator abstraction Paul Davies
` (6 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:28 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
Abstracts unmap_page_range iterator from memory.c to pt_default.c
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
memory.c | 96 +++++++----------------------------------------------------
pt-default.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 100 insertions(+), 83 deletions(-)
Index: linux-2.6.17.2/mm/memory.c
===================================================================
--- linux-2.6.17.2.orig/mm/memory.c 2006-07-08 19:43:34.673611200 +1000
+++ linux-2.6.17.2/mm/memory.c 2006-07-08 19:59:51.146361032 +1000
@@ -269,23 +269,14 @@
return copy_dual_iterator(dst_mm, src_mm, addr, end, vma);
}
-static unsigned long zap_pte_range(struct mmu_gather *tlb,
- struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, unsigned long end,
- long *zap_work, struct zap_details *details)
+void zap_one_pte(pte_t *pte, struct mm_struct *mm, unsigned long addr,
+ struct vm_area_struct *vma, long *zap_work, struct zap_details *details,
+ struct mmu_gather *tlb, int *anon_rss, int* file_rss)
{
- struct mm_struct *mm = tlb->mm;
- pte_t *pte;
- spinlock_t *ptl;
- int file_rss = 0;
- int anon_rss = 0;
-
- pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
- do {
pte_t ptent = *pte;
if (pte_none(ptent)) {
(*zap_work)--;
- continue;
+ return;
}
(*zap_work) -= PAGE_SIZE;
@@ -302,7 +293,7 @@
*/
if (details->check_mapping &&
details->check_mapping != page->mapping)
- continue;
+ return;
/*
* Each page->index must be checked when
* invalidating or truncating nonlinear.
@@ -310,90 +301,40 @@
if (details->nonlinear_vma &&
(page->index < details->first_index ||
page->index > details->last_index))
- continue;
+ return;
}
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
tlb_remove_tlb_entry(tlb, pte, addr);
if (unlikely(!page))
- continue;
+ return;
if (unlikely(details) && details->nonlinear_vma
&& linear_page_index(details->nonlinear_vma,
addr) != page->index)
set_pte_at(mm, addr, pte,
pgoff_to_pte(page->index));
if (PageAnon(page))
- anon_rss--;
+ (*anon_rss)--;
else {
if (pte_dirty(ptent))
set_page_dirty(page);
if (pte_young(ptent))
mark_page_accessed(page);
- file_rss--;
+ (*file_rss)--;
}
page_remove_rmap(page);
tlb_remove_page(tlb, page);
- continue;
+ return;
}
/*
* If details->check_mapping, we leave swap entries;
* if details->nonlinear_vma, we leave file entries.
*/
if (unlikely(details))
- continue;
+ return;
if (!pte_file(ptent))
free_swap_and_cache(pte_to_swp_entry(ptent));
pte_clear_full(mm, addr, pte, tlb->fullmm);
- } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
-
- add_mm_rss(mm, file_rss, anon_rss);
- pte_unmap_unlock(pte - 1, ptl);
-
- return addr;
-}
-
-static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
- struct vm_area_struct *vma, pud_t *pud,
- unsigned long addr, unsigned long end,
- long *zap_work, struct zap_details *details)
-{
- pmd_t *pmd;
- unsigned long next;
-
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd)) {
- (*zap_work)--;
- continue;
- }
- next = zap_pte_range(tlb, vma, pmd, addr, next,
- zap_work, details);
- } while (pmd++, addr = next, (addr != end && *zap_work > 0));
-
- return addr;
-}
-
-static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
- struct vm_area_struct *vma, pgd_t *pgd,
- unsigned long addr, unsigned long end,
- long *zap_work, struct zap_details *details)
-{
- pud_t *pud;
- unsigned long next;
-
- pud = pud_offset(pgd, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud)) {
- (*zap_work)--;
- continue;
- }
- next = zap_pmd_range(tlb, vma, pud, addr, next,
- zap_work, details);
- } while (pud++, addr = next, (addr != end && *zap_work > 0));
-
- return addr;
}
static unsigned long unmap_page_range(struct mmu_gather *tlb,
@@ -401,24 +342,13 @@
unsigned long addr, unsigned long end,
long *zap_work, struct zap_details *details)
{
- pgd_t *pgd;
- unsigned long next;
-
if (details && !details->check_mapping && !details->nonlinear_vma)
details = NULL;
BUG_ON(addr >= end);
tlb_start_vma(tlb, vma);
- pgd = pgd_offset(vma->vm_mm, addr);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd)) {
- (*zap_work)--;
- continue;
- }
- next = zap_pud_range(tlb, vma, pgd, addr, next,
- zap_work, details);
- } while (pgd++, addr = next, (addr != end && *zap_work > 0));
+
+ addr = unmap_page_range_iterator(tlb, vma, addr, end, zap_work, details);
tlb_end_vma(tlb, vma);
return addr;
Index: linux-2.6.17.2/mm/pt-default.c
===================================================================
--- linux-2.6.17.2.orig/mm/pt-default.c 2006-07-08 19:43:34.672611352 +1000
+++ linux-2.6.17.2/mm/pt-default.c 2006-07-08 19:58:22.610820480 +1000
@@ -399,3 +399,90 @@
} while (dst_pgd++, src_pgd++, addr = next, addr != end);
return 0;
}
+
+static unsigned long zap_pte_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ long *zap_work, struct zap_details *details)
+{
+ struct mm_struct *mm = tlb->mm;
+ pte_t *pte;
+ spinlock_t *ptl;
+ int file_rss = 0;
+ int anon_rss = 0;
+
+ pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ do {
+ zap_one_pte(pte, mm, addr, vma, zap_work, details, tlb, &anon_rss, &file_rss);
+ } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
+
+ add_mm_rss(mm, file_rss, anon_rss);
+ pte_unmap_unlock(pte - 1, ptl);
+
+ return addr;
+}
+
+static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ long *zap_work, struct zap_details *details)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd)) {
+ (*zap_work)--;
+ continue;
+ }
+ next = zap_pte_range(tlb, vma, pmd, addr, next,
+ zap_work, details);
+ } while (pmd++, addr = next, (addr != end && *zap_work > 0));
+
+ return addr;
+}
+
+static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ long *zap_work, struct zap_details *details)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud)) {
+ (*zap_work)--;
+ continue;
+ }
+ next = zap_pmd_range(tlb, vma, pud, addr, next,
+ zap_work, details);
+ } while (pud++, addr = next, (addr != end && *zap_work > 0));
+
+ return addr;
+}
+
+unsigned long unmap_page_range_iterator(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, unsigned long addr, unsigned long end,
+ long *zap_work, struct zap_details *details)
+{
+ pgd_t *pgd;
+ unsigned long next;
+
+ pgd = pgd_offset(vma->vm_mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd)) {
+ (*zap_work)--;
+ continue;
+ }
+ next = zap_pud_range(tlb, vma, pgd, addr, next, zap_work,
+ details);
+ } while (pgd++, addr = next, (addr != end && *zap_work > 0));
+
+ return addr;
+}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 12/18] PTI - Zeromap iterator abstraction
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
` (10 preceding siblings ...)
2006-07-13 4:28 ` [PATCH 11/18] PTI - Unmap page range abstraction Paul Davies
@ 2006-07-13 4:28 ` Paul Davies
2006-07-13 4:28 ` [PATCH 13/18] PTI - Msync " Paul Davies
` (5 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:28 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
1) Abstracts zeromap_page_range iterator from memory.c to pt_default.c
2) Add remap_pfn_range to pt_default.c
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
memory.c | 71 +++-------------------------
pt-default.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 155 insertions(+), 62 deletions(-)
Index: linux-2.6.17.2/mm/memory.c
===================================================================
--- linux-2.6.17.2.orig/mm/memory.c 2006-07-08 20:38:57.812521328 +1000
+++ linux-2.6.17.2/mm/memory.c 2006-07-08 20:46:15.496234144 +1000
@@ -649,80 +649,27 @@
EXPORT_SYMBOL(get_user_pages);
-static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
- unsigned long addr, unsigned long end, pgprot_t prot)
+void zeromap_one_pte(struct mm_struct *mm, pte_t *pte, unsigned long addr, pgprot_t prot)
{
- pte_t *pte;
- spinlock_t *ptl;
-
- pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
- if (!pte)
- return -ENOMEM;
- do {
- struct page *page = ZERO_PAGE(addr);
- pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
- page_cache_get(page);
- page_add_file_rmap(page);
- inc_mm_counter(mm, file_rss);
- BUG_ON(!pte_none(*pte));
- set_pte_at(mm, addr, pte, zero_pte);
- } while (pte++, addr += PAGE_SIZE, addr != end);
- pte_unmap_unlock(pte - 1, ptl);
- return 0;
-}
-
-static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
- unsigned long addr, unsigned long end, pgprot_t prot)
-{
- pmd_t *pmd;
- unsigned long next;
-
- pmd = pmd_alloc(mm, pud, addr);
- if (!pmd)
- return -ENOMEM;
- do {
- next = pmd_addr_end(addr, end);
- if (zeromap_pte_range(mm, pmd, addr, next, prot))
- return -ENOMEM;
- } while (pmd++, addr = next, addr != end);
- return 0;
-}
-
-static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
- unsigned long addr, unsigned long end, pgprot_t prot)
-{
- pud_t *pud;
- unsigned long next;
-
- pud = pud_alloc(mm, pgd, addr);
- if (!pud)
- return -ENOMEM;
- do {
- next = pud_addr_end(addr, end);
- if (zeromap_pmd_range(mm, pud, addr, next, prot))
- return -ENOMEM;
- } while (pud++, addr = next, addr != end);
- return 0;
+ struct page *page = ZERO_PAGE(addr);
+ pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
+ page_cache_get(page);
+ page_add_file_rmap(page);
+ inc_mm_counter(mm, file_rss);
+ BUG_ON(!pte_none(*pte));
+ set_pte_at(mm, addr, pte, zero_pte);
}
int zeromap_page_range(struct vm_area_struct *vma,
unsigned long addr, unsigned long size, pgprot_t prot)
{
- pgd_t *pgd;
- unsigned long next;
unsigned long end = addr + size;
struct mm_struct *mm = vma->vm_mm;
int err;
BUG_ON(addr >= end);
- pgd = pgd_offset(mm, addr);
flush_cache_range(vma, addr, end);
- do {
- next = pgd_addr_end(addr, end);
- err = zeromap_pud_range(mm, pgd, addr, next, prot);
- if (err)
- break;
- } while (pgd++, addr = next, addr != end);
+ err = zeromap_build_iterator(mm, addr, end, prot);
return err;
}
Index: linux-2.6.17.2/mm/pt-default.c
===================================================================
--- linux-2.6.17.2.orig/mm/pt-default.c 2006-07-08 20:38:57.812521328 +1000
+++ linux-2.6.17.2/mm/pt-default.c 2006-07-08 20:44:58.658664376 +1000
@@ -486,3 +486,149 @@
return addr;
}
+
+static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end, pgprot_t prot)
+{
+ pte_t *pte;
+ spinlock_t *ptl;
+
+ pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
+ if (!pte)
+ return -ENOMEM;
+ do {
+ zeromap_one_pte(mm, pte, addr, prot);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap_unlock(pte - 1, ptl);
+ return 0;
+}
+
+static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end, pgprot_t prot)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
+ do {
+ next = pmd_addr_end(addr, end);
+ if (zeromap_pte_range(mm, pmd, addr, next, prot))
+ return -ENOMEM;
+ } while (pmd++, addr = next, addr != end);
+ return 0;
+}
+
+static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end, pgprot_t prot)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pud = pud_alloc(mm, pgd, addr);
+ if (!pud)
+ return -ENOMEM;
+ do {
+ next = pud_addr_end(addr, end);
+ if (zeromap_pmd_range(mm, pud, addr, next, prot))
+ return -ENOMEM;
+ } while (pud++, addr = next, addr != end);
+ return 0;
+}
+
+int zeromap_build_iterator(struct mm_struct *mm,
+ unsigned long addr, unsigned long end, pgprot_t prot)
+{
+ unsigned long next;
+ pgd_t *pgd;
+
+ pgd = pgd_offset(mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if(zeromap_pud_range(mm, pgd, addr, next, prot))
+ return -ENOMEM;
+ } while (pgd++, addr = next, addr != end);
+ return 0;
+}
+
+/*
+ * maps a range of physical memory into the requested pages. the old
+ * mappings are removed. any references to nonexistent pages results
+ * in null mappings (currently treated as "copy-on-access")
+ */
+static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ unsigned long pfn, pgprot_t prot)
+{
+ pte_t *pte;
+ spinlock_t *ptl;
+
+ pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
+ if (!pte)
+ return -ENOMEM;
+ do {
+ remap_one_pte(mm, pte, addr, pfn++, prot);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap_unlock(pte - 1, ptl);
+ return 0;
+}
+
+static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ unsigned long pfn, pgprot_t prot)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pfn -= addr >> PAGE_SHIFT;
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
+ do {
+ next = pmd_addr_end(addr, end);
+ if (remap_pte_range(mm, pmd, addr, next,
+ pfn + (addr >> PAGE_SHIFT), prot))
+ return -ENOMEM;
+ } while (pmd++, addr = next, addr != end);
+ return 0;
+}
+
+static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ unsigned long pfn, pgprot_t prot)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pfn -= addr >> PAGE_SHIFT;
+ pud = pud_alloc(mm, pgd, addr);
+ if (!pud)
+ return -ENOMEM;
+ do {
+ next = pud_addr_end(addr, end);
+ if (remap_pmd_range(mm, pud, addr, next,
+ pfn + (addr >> PAGE_SHIFT), prot))
+ return -ENOMEM;
+ } while (pud++, addr = next, addr != end);
+ return 0;
+}
+
+int remap_build_iterator(struct mm_struct *mm,
+ unsigned long addr, unsigned long end, unsigned long pfn,
+ pgprot_t prot)
+{
+ pgd_t *pgd;
+ unsigned long next;
+ int err;
+
+ pgd = pgd_offset(mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ err = remap_pud_range(mm, pgd, addr, next,
+ pfn + (addr >> PAGE_SHIFT), prot);
+ if (err)
+ break;
+ } while (pgd++, addr = next, addr != end);
+ return 0;
+}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 13/18] PTI - Msync iterator abstraction
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
` (11 preceding siblings ...)
2006-07-13 4:28 ` [PATCH 12/18] PTI - Zeromap iterator abstraction Paul Davies
@ 2006-07-13 4:28 ` Paul Davies
2006-07-13 4:29 ` [PATCH 14/18] PTI - Vmalloc iterators asbstractions Paul Davies
` (4 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:28 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
1) Abstracts msync iterator from msync.c to pt_default.c
2) Abstract remap_pfn_range from memory.c
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
memory.c | 76 +++-------------------------------------------
msync.c | 96 +++++++++--------------------------------------------------
pt-default.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 102 insertions(+), 151 deletions(-)
Index: linux-2.6.17.2/mm/memory.c
===================================================================
--- linux-2.6.17.2.orig/mm/memory.c 2006-07-08 20:46:15.496234144 +1000
+++ linux-2.6.17.2/mm/memory.c 2006-07-08 20:48:44.426434024 +1000
@@ -740,76 +740,17 @@
}
EXPORT_SYMBOL(vm_insert_page);
-/*
- * maps a range of physical memory into the requested pages. the old
- * mappings are removed. any references to nonexistent pages results
- * in null mappings (currently treated as "copy-on-access")
- */
-static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
- unsigned long addr, unsigned long end,
- unsigned long pfn, pgprot_t prot)
+void remap_one_pte(struct mm_struct *mm, pte_t *pte, unsigned long addr,
+ unsigned long pfn, pgprot_t prot)
{
- pte_t *pte;
- spinlock_t *ptl;
-
- pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
- if (!pte)
- return -ENOMEM;
- do {
- BUG_ON(!pte_none(*pte));
- set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
- pfn++;
- } while (pte++, addr += PAGE_SIZE, addr != end);
- pte_unmap_unlock(pte - 1, ptl);
- return 0;
-}
-
-static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
- unsigned long addr, unsigned long end,
- unsigned long pfn, pgprot_t prot)
-{
- pmd_t *pmd;
- unsigned long next;
-
- pfn -= addr >> PAGE_SHIFT;
- pmd = pmd_alloc(mm, pud, addr);
- if (!pmd)
- return -ENOMEM;
- do {
- next = pmd_addr_end(addr, end);
- if (remap_pte_range(mm, pmd, addr, next,
- pfn + (addr >> PAGE_SHIFT), prot))
- return -ENOMEM;
- } while (pmd++, addr = next, addr != end);
- return 0;
-}
-
-static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
- unsigned long addr, unsigned long end,
- unsigned long pfn, pgprot_t prot)
-{
- pud_t *pud;
- unsigned long next;
-
- pfn -= addr >> PAGE_SHIFT;
- pud = pud_alloc(mm, pgd, addr);
- if (!pud)
- return -ENOMEM;
- do {
- next = pud_addr_end(addr, end);
- if (remap_pmd_range(mm, pud, addr, next,
- pfn + (addr >> PAGE_SHIFT), prot))
- return -ENOMEM;
- } while (pud++, addr = next, addr != end);
- return 0;
+ BUG_ON(!pte_none(*pte));
+ set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
}
/* Note: this is only safe if the mm semaphore is held when called. */
int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t prot)
{
- pgd_t *pgd;
- unsigned long next;
unsigned long end = addr + PAGE_ALIGN(size);
struct mm_struct *mm = vma->vm_mm;
int err;
@@ -842,15 +783,8 @@
BUG_ON(addr >= end);
pfn -= addr >> PAGE_SHIFT;
- pgd = pgd_offset(mm, addr);
flush_cache_range(vma, addr, end);
- do {
- next = pgd_addr_end(addr, end);
- err = remap_pud_range(mm, pgd, addr, next,
- pfn + (addr >> PAGE_SHIFT), prot);
- if (err)
- break;
- } while (pgd++, addr = next, addr != end);
+ err = remap_build_iterator(mm, addr, end, pfn, prot);
return err;
}
EXPORT_SYMBOL(remap_pfn_range);
Index: linux-2.6.17.2/mm/msync.c
===================================================================
--- linux-2.6.17.2.orig/mm/msync.c 2006-06-30 10:17:23.000000000 +1000
+++ linux-2.6.17.2/mm/msync.c 2006-07-08 20:51:18.519008392 +1000
@@ -16,89 +16,32 @@
#include <linux/writeback.h>
#include <linux/file.h>
#include <linux/syscalls.h>
+#include <linux/pt.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
-static unsigned long msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, unsigned long end)
-{
- pte_t *pte;
- spinlock_t *ptl;
- int progress = 0;
- unsigned long ret = 0;
-
-again:
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
- do {
- struct page *page;
-
- if (progress >= 64) {
- progress = 0;
- if (need_resched() || need_lockbreak(ptl))
- break;
- }
- progress++;
- if (!pte_present(*pte))
- continue;
- if (!pte_maybe_dirty(*pte))
- continue;
- page = vm_normal_page(vma, addr, *pte);
- if (!page)
- continue;
- if (ptep_clear_flush_dirty(vma, addr, pte) ||
- page_test_and_clear_dirty(page))
- ret += set_page_dirty(page);
- progress += 3;
- } while (pte++, addr += PAGE_SIZE, addr != end);
- pte_unmap_unlock(pte - 1, ptl);
- cond_resched();
- if (addr != end)
- goto again;
- return ret;
-}
-
-static inline unsigned long msync_pmd_range(struct vm_area_struct *vma,
- pud_t *pud, unsigned long addr, unsigned long end)
+int msync_one_pte(pte_t *pte, unsigned long address,
+ struct vm_area_struct *vma, unsigned long *ret)
{
- pmd_t *pmd;
- unsigned long next;
- unsigned long ret = 0;
+ struct page *page;
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd))
- continue;
- ret += msync_pte_range(vma, pmd, addr, next);
- } while (pmd++, addr = next, addr != end);
- return ret;
-}
-
-static inline unsigned long msync_pud_range(struct vm_area_struct *vma,
- pgd_t *pgd, unsigned long addr, unsigned long end)
-{
- pud_t *pud;
- unsigned long next;
- unsigned long ret = 0;
-
- pud = pud_offset(pgd, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
- ret += msync_pmd_range(vma, pud, addr, next);
- } while (pud++, addr = next, addr != end);
- return ret;
+ if (!pte_present(*pte))
+ return 0;
+ if (!pte_maybe_dirty(*pte))
+ return 0;
+ page = vm_normal_page(vma, address, *pte);
+ if (!page)
+ return 0;
+ if (ptep_clear_flush_dirty(vma, address, pte) ||
+ page_test_and_clear_dirty(page))
+ *ret += set_page_dirty(page);
+ return 1;
}
static unsigned long msync_page_range(struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
- pgd_t *pgd;
- unsigned long next;
- unsigned long ret = 0;
-
/* For hugepages we can't go walking the page table normally,
* but that's ok, hugetlbfs is memory based, so we don't need
* to do anything more on an msync().
@@ -107,15 +50,8 @@
return 0;
BUG_ON(addr >= end);
- pgd = pgd_offset(vma->vm_mm, addr);
flush_cache_range(vma, addr, end);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- ret += msync_pud_range(vma, pgd, addr, next);
- } while (pgd++, addr = next, addr != end);
- return ret;
+ return msync_read_iterator(vma, addr, end);
}
/*
Index: linux-2.6.17.2/mm/pt-default.c
===================================================================
--- linux-2.6.17.2.orig/mm/pt-default.c 2006-07-08 20:54:42.425009968 +1000
+++ linux-2.6.17.2/mm/pt-default.c 2006-07-08 20:55:53.911142424 +1000
@@ -632,3 +632,84 @@
} while (pgd++, addr = next, addr != end);
return 0;
}
+
+static unsigned long msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, unsigned long end)
+{
+ pte_t *pte;
+ spinlock_t *ptl;
+ int progress = 0;
+ unsigned long ret = 0;
+
+again:
+ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ do {
+ if (progress >= 64) {
+ progress = 0;
+ if (need_resched() || need_lockbreak(ptl))
+ break;
+ }
+ progress++;
+ if(!msync_one_pte(pte, addr, vma, &ret))
+ continue;
+ progress += 3;
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap_unlock(pte - 1, ptl);
+ cond_resched();
+ if (addr != end)
+ goto again;
+ return ret;
+}
+
+static inline unsigned long msync_pmd_range(struct vm_area_struct *vma,
+ pud_t *pud, unsigned long addr, unsigned long end)
+{
+ pmd_t *pmd;
+ unsigned long next;
+ unsigned long ret = 0;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ ret += msync_pte_range(vma, pmd, addr, next);
+ } while (pmd++, addr = next, addr != end);
+ return ret;
+}
+
+static inline unsigned long msync_pud_range(struct vm_area_struct *vma,
+ pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+ pud_t *pud;
+ unsigned long next;
+ unsigned long ret = 0;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ ret += msync_pmd_range(vma, pud, addr, next);
+ } while (pud++, addr = next, addr != end);
+ return ret;
+}
+
+unsigned long msync_read_iterator(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ pgd_t *pgd;
+ unsigned long next;
+ unsigned long ret=0;
+
+ pgd = pgd_offset(mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd)) {
+ continue;
+ }
+ ret += msync_pud_range(vma, pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+ return ret;
+}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 14/18] PTI - Vmalloc iterators asbstractions
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
` (12 preceding siblings ...)
2006-07-13 4:28 ` [PATCH 13/18] PTI - Msync " Paul Davies
@ 2006-07-13 4:29 ` Paul Davies
2006-07-13 4:29 ` [PATCH 15/18] PTI - Change protection iterator abstraction Paul Davies
` (3 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:29 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
1) Abstracts vmalloc build iterator from vmalloc.c to pt_default.c
2) Abstracts vmalloc read iterator from vmalloc.c to pt_default.c
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
pt-default.c | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
vmalloc.c | 116 ++++++-------------------------------------------------
2 files changed, 136 insertions(+), 103 deletions(-)
Index: linux-2.6.17.2/mm/vmalloc.c
===================================================================
--- linux-2.6.17.2.orig/mm/vmalloc.c 2006-06-30 10:17:23.000000000 +1000
+++ linux-2.6.17.2/mm/vmalloc.c 2006-07-08 21:01:06.193668264 +1000
@@ -16,6 +16,7 @@
#include <linux/interrupt.h>
#include <linux/vmalloc.h>
+#include <linux/pt.h>
#include <asm/uaccess.h>
#include <asm/tlbflush.h>
@@ -24,135 +25,44 @@
DEFINE_RWLOCK(vmlist_lock);
struct vm_struct *vmlist;
-static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
+void vunmap_one_pte(pte_t *pte, unsigned long address)
{
- pte_t *pte;
-
- pte = pte_offset_kernel(pmd, addr);
- do {
- pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
- WARN_ON(!pte_none(ptent) && !pte_present(ptent));
- } while (pte++, addr += PAGE_SIZE, addr != end);
-}
-
-static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr,
- unsigned long end)
-{
- pmd_t *pmd;
- unsigned long next;
-
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd))
- continue;
- vunmap_pte_range(pmd, addr, next);
- } while (pmd++, addr = next, addr != end);
-}
-
-static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr,
- unsigned long end)
-{
- pud_t *pud;
- unsigned long next;
-
- pud = pud_offset(pgd, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
- vunmap_pmd_range(pud, addr, next);
- } while (pud++, addr = next, addr != end);
+ pte_t ptent = ptep_get_and_clear(&init_mm, address, pte);
+ WARN_ON(!pte_none(ptent) && !pte_present(ptent));
}
void unmap_vm_area(struct vm_struct *area)
{
- pgd_t *pgd;
- unsigned long next;
unsigned long addr = (unsigned long) area->addr;
unsigned long end = addr + area->size;
BUG_ON(addr >= end);
- pgd = pgd_offset_k(addr);
flush_cache_vunmap(addr, end);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- vunmap_pud_range(pgd, addr, next);
- } while (pgd++, addr = next, addr != end);
+ vunmap_read_iterator(addr, end);
flush_tlb_kernel_range((unsigned long) area->addr, end);
}
-static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
- unsigned long end, pgprot_t prot, struct page ***pages)
-{
- pte_t *pte;
-
- pte = pte_alloc_kernel(pmd, addr);
- if (!pte)
- return -ENOMEM;
- do {
- struct page *page = **pages;
- WARN_ON(!pte_none(*pte));
- if (!page)
- return -ENOMEM;
- set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
- (*pages)++;
- } while (pte++, addr += PAGE_SIZE, addr != end);
- return 0;
-}
-
-static inline int vmap_pmd_range(pud_t *pud, unsigned long addr,
- unsigned long end, pgprot_t prot, struct page ***pages)
-{
- pmd_t *pmd;
- unsigned long next;
-
- pmd = pmd_alloc(&init_mm, pud, addr);
- if (!pmd)
- return -ENOMEM;
- do {
- next = pmd_addr_end(addr, end);
- if (vmap_pte_range(pmd, addr, next, prot, pages))
- return -ENOMEM;
- } while (pmd++, addr = next, addr != end);
- return 0;
-}
-
-static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr,
- unsigned long end, pgprot_t prot, struct page ***pages)
+int vmap_one_pte(pte_t *pte, unsigned long addr,
+ struct page ***pages, pgprot_t prot)
{
- pud_t *pud;
- unsigned long next;
+ struct page *page = **pages;
- pud = pud_alloc(&init_mm, pgd, addr);
- if (!pud)
+ WARN_ON(!pte_none(*pte));
+ if (!page)
return -ENOMEM;
- do {
- next = pud_addr_end(addr, end);
- if (vmap_pmd_range(pud, addr, next, prot, pages))
- return -ENOMEM;
- } while (pud++, addr = next, addr != end);
+ set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
+ (*pages)++;
return 0;
}
int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
{
- pgd_t *pgd;
- unsigned long next;
unsigned long addr = (unsigned long) area->addr;
unsigned long end = addr + area->size - PAGE_SIZE;
int err;
BUG_ON(addr >= end);
- pgd = pgd_offset_k(addr);
- do {
- next = pgd_addr_end(addr, end);
- err = vmap_pud_range(pgd, addr, next, prot, pages);
- if (err)
- break;
- } while (pgd++, addr = next, addr != end);
+ err = vmap_build_iterator(addr, end, prot, pages);
flush_cache_vmap((unsigned long) area->addr, end);
return err;
}
Index: linux-2.6.17.2/mm/pt-default.c
===================================================================
--- linux-2.6.17.2.orig/mm/pt-default.c 2006-07-08 20:56:52.652212424 +1000
+++ linux-2.6.17.2/mm/pt-default.c 2006-07-08 21:02:46.797374176 +1000
@@ -713,3 +713,126 @@
} while (pgd++, addr = next, addr != end);
return ret;
}
+
+static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
+{
+ pte_t *pte;
+
+ pte = pte_offset_kernel(pmd, addr);
+ do {
+ vunmap_one_pte(pte, addr);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+}
+
+static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr,
+ unsigned long end)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ vunmap_pte_range(pmd, addr, next);
+ } while (pmd++, addr = next, addr != end);
+}
+
+static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr,
+ unsigned long end)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ vunmap_pmd_range(pud, addr, next);
+ } while (pud++, addr = next, addr != end);
+}
+
+void vunmap_read_iterator(unsigned long addr, unsigned long end)
+{
+ pgd_t *pgd;
+ unsigned long next;
+
+ pgd = pgd_offset_k(addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ vunmap_pud_range(pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+}
+
+
+static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
+ unsigned long end, pgprot_t prot, struct page ***pages)
+{
+ pte_t *pte;
+ int err;
+
+ pte = pte_alloc_kernel(pmd, addr);
+ if (!pte)
+ return -ENOMEM;
+ do {
+ err = vmap_one_pte(pte, addr, pages, prot);
+ if(err)
+ return err;
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ return 0;
+}
+
+static inline int vmap_pmd_range(pud_t *pud, unsigned long addr,
+ unsigned long end, pgprot_t prot, struct page ***pages)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pmd = pmd_alloc(&init_mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
+ do {
+ next = pmd_addr_end(addr, end);
+ if (vmap_pte_range(pmd, addr, next, prot, pages))
+ return -ENOMEM;
+ } while (pmd++, addr = next, addr != end);
+ return 0;
+}
+
+static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr,
+ unsigned long end, pgprot_t prot, struct page ***pages)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pud = pud_alloc(&init_mm, pgd, addr);
+ if (!pud)
+ return -ENOMEM;
+ do {
+ next = pud_addr_end(addr, end);
+ if (vmap_pmd_range(pud, addr, next, prot, pages))
+ return -ENOMEM;
+ } while (pud++, addr = next, addr != end);
+ return 0;
+}
+
+int vmap_build_iterator(unsigned long addr,
+ unsigned long end, pgprot_t prot, struct page ***pages)
+{
+ pgd_t *pgd;
+ unsigned long next;
+ int err;
+
+ pgd = pgd_offset_k(addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ err = vmap_pud_range(pgd, addr, next, prot, pages);
+ if (err)
+ break;
+ } while (pgd++, addr = next, addr != end);
+ return 0;
+}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 15/18] PTI - Change protection iterator abstraction
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
` (13 preceding siblings ...)
2006-07-13 4:29 ` [PATCH 14/18] PTI - Vmalloc iterators asbstractions Paul Davies
@ 2006-07-13 4:29 ` Paul Davies
2006-07-13 4:29 ` [PATCH 16/18] PTI - Mremap " Paul Davies
` (2 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:29 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
1) Abstracts change protection iterator from mprotect.c and
puts it in pt_default.c
2) Abstracts smaps iterator from fs/proc/mmu_task.c and
puts it in pt_default.c
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
fs/proc/task_mmu.c | 105 ++++++++++++-----------------------------------------
mm/mprotect.c | 73 +++++++-----------------------------
mm/pt-default.c | 77 ++++++++++++++++++++++++++++++++++++++
3 files changed, 116 insertions(+), 139 deletions(-)
Index: linux-2.6.17.2/mm/mprotect.c
===================================================================
--- linux-2.6.17.2.orig/mm/mprotect.c 2006-07-09 01:41:14.098069592 +1000
+++ linux-2.6.17.2/mm/mprotect.c 2006-07-09 01:41:21.556935672 +1000
@@ -19,82 +19,37 @@
#include <linux/mempolicy.h>
#include <linux/personality.h>
#include <linux/syscalls.h>
+#include <linux/pt.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
-static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
- unsigned long addr, unsigned long end, pgprot_t newprot)
-{
- pte_t *pte;
- spinlock_t *ptl;
-
- pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
- do {
- if (pte_present(*pte)) {
- pte_t ptent;
-
- /* Avoid an SMP race with hardware updated dirty/clean
- * bits by wiping the pte and then setting the new pte
- * into place.
- */
- ptent = pte_modify(ptep_get_and_clear(mm, addr, pte), newprot);
- set_pte_at(mm, addr, pte, ptent);
- lazy_mmu_prot_update(ptent);
- }
- } while (pte++, addr += PAGE_SIZE, addr != end);
- pte_unmap_unlock(pte - 1, ptl);
-}
-
-static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud,
- unsigned long addr, unsigned long end, pgprot_t newprot)
+void change_prot_pte(struct mm_struct *mm, pte_t *pte,
+ unsigned long address, pgprot_t newprot)
{
- pmd_t *pmd;
- unsigned long next;
+ if (pte_present(*pte)) {
+ pte_t ptent;
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd))
- continue;
- change_pte_range(mm, pmd, addr, next, newprot);
- } while (pmd++, addr = next, addr != end);
-}
-
-static inline void change_pud_range(struct mm_struct *mm, pgd_t *pgd,
- unsigned long addr, unsigned long end, pgprot_t newprot)
-{
- pud_t *pud;
- unsigned long next;
-
- pud = pud_offset(pgd, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
- change_pmd_range(mm, pud, addr, next, newprot);
- } while (pud++, addr = next, addr != end);
+ /* Avoid an SMP race with hardware updated dirty/clean
+ * bits by wiping the pte and then setting the new pte
+ * into place.
+ */
+ ptent = pte_modify(ptep_get_and_clear(mm, address, pte), newprot);
+ set_pte_at(mm, addr, pte, ptent);
+ lazy_mmu_prot_update(ptent);
+ }
}
static void change_protection(struct vm_area_struct *vma,
unsigned long addr, unsigned long end, pgprot_t newprot)
{
- struct mm_struct *mm = vma->vm_mm;
- pgd_t *pgd;
- unsigned long next;
unsigned long start = addr;
BUG_ON(addr >= end);
- pgd = pgd_offset(mm, addr);
flush_cache_range(vma, addr, end);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- change_pud_range(mm, pgd, addr, next, newprot);
- } while (pgd++, addr = next, addr != end);
+ change_protection_read_iterator(vma, addr, end, newprot);
flush_tlb_range(vma, start, end);
}
Index: linux-2.6.17.2/mm/pt-default.c
===================================================================
--- linux-2.6.17.2.orig/mm/pt-default.c 2006-07-09 01:41:14.098069592 +1000
+++ linux-2.6.17.2/mm/pt-default.c 2006-07-09 01:43:23.620379208 +1000
@@ -836,3 +836,80 @@
} while (pgd++, addr = next, addr != end);
return 0;
}
+
+static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end, pgprot_t newprot)
+{
+ pte_t *pte;
+ spinlock_t *ptl;
+
+ pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ do {
+ change_prot_pte(mm, pte, addr, newprot);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap_unlock(pte - 1, ptl);
+}
+
+static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end, pgprot_t newprot)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ change_pte_range(mm, pmd, addr, next, newprot);
+ } while (pmd++, addr = next, addr != end);
+}
+
+static inline void change_pud_range(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end, pgprot_t newprot)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ change_pmd_range(mm, pud, addr, next, newprot);
+ } while (pud++, addr = next, addr != end);
+}
+
+void change_protection_read_iterator(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end, pgprot_t newprot)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ pgd_t *pgd;
+ unsigned long next;
+
+ pgd = pgd_offset(mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd)) {
+ continue;
+ }
+ change_pud_range(mm, pgd, addr, next, newprot);
+ } while (pgd++, addr = next, addr != end);
+}
+
+static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ struct mem_size_stats *mss)
+{
+ pte_t *pte;
+ spinlock_t *ptl;
+
+ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ do {
+
+ smaps_one_pte(vma, addr, pte, mss);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap_unlock(pte - 1, ptl);
+ cond_resched();
+}
+
Index: linux-2.6.17.2/fs/proc/task_mmu.c
===================================================================
--- linux-2.6.17.2.orig/fs/proc/task_mmu.c 2006-07-09 01:41:14.099069440 +1000
+++ linux-2.6.17.2/fs/proc/task_mmu.c 2006-07-09 01:41:21.557935520 +1000
@@ -190,88 +190,33 @@
return show_map_internal(m, v, NULL);
}
-static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, unsigned long end,
- struct mem_size_stats *mss)
+void smaps_one_pte(struct vm_area_struct *vma, unsigned long addr, pte_t *pte,
+ struct mem_size_stats *mss)
{
- pte_t *pte, ptent;
- spinlock_t *ptl;
+ pte_t ptent;
struct page *page;
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
- do {
- ptent = *pte;
- if (!pte_present(ptent))
- continue;
-
- mss->resident += PAGE_SIZE;
-
- page = vm_normal_page(vma, addr, ptent);
- if (!page)
- continue;
-
- if (page_mapcount(page) >= 2) {
- if (pte_dirty(ptent))
- mss->shared_dirty += PAGE_SIZE;
- else
- mss->shared_clean += PAGE_SIZE;
- } else {
- if (pte_dirty(ptent))
- mss->private_dirty += PAGE_SIZE;
- else
- mss->private_clean += PAGE_SIZE;
- }
- } while (pte++, addr += PAGE_SIZE, addr != end);
- pte_unmap_unlock(pte - 1, ptl);
- cond_resched();
-}
-
-static inline void smaps_pmd_range(struct vm_area_struct *vma, pud_t *pud,
- unsigned long addr, unsigned long end,
- struct mem_size_stats *mss)
-{
- pmd_t *pmd;
- unsigned long next;
-
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd))
- continue;
- smaps_pte_range(vma, pmd, addr, next, mss);
- } while (pmd++, addr = next, addr != end);
-}
-
-static inline void smaps_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
- unsigned long addr, unsigned long end,
- struct mem_size_stats *mss)
-{
- pud_t *pud;
- unsigned long next;
-
- pud = pud_offset(pgd, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
- smaps_pmd_range(vma, pud, addr, next, mss);
- } while (pud++, addr = next, addr != end);
-}
-
-static inline void smaps_pgd_range(struct vm_area_struct *vma,
- unsigned long addr, unsigned long end,
- struct mem_size_stats *mss)
-{
- pgd_t *pgd;
- unsigned long next;
-
- pgd = pgd_offset(vma->vm_mm, addr);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- smaps_pud_range(vma, pgd, addr, next, mss);
- } while (pgd++, addr = next, addr != end);
+ ptent = *pte;
+ if (!pte_present(ptent))
+ return;
+
+ mss->resident += PAGE_SIZE;
+
+ page = vm_normal_page(vma, addr, ptent);
+ if (!page)
+ return;
+
+ if (page_mapcount(page) >= 2) {
+ if (pte_dirty(ptent))
+ mss->shared_dirty += PAGE_SIZE;
+ else
+ mss->shared_clean += PAGE_SIZE;
+ } else {
+ if (pte_dirty(ptent))
+ mss->private_dirty += PAGE_SIZE;
+ else
+ mss->private_clean += PAGE_SIZE;
+ }
}
static int show_smap(struct seq_file *m, void *v)
@@ -281,7 +226,7 @@
memset(&mss, 0, sizeof mss);
if (vma->vm_mm && !is_vm_hugetlb_page(vma))
- smaps_pgd_range(vma, vma->vm_start, vma->vm_end, &mss);
+ smaps_read_range(vma, vma->vm_start, vma->vm_end, &mss);
return show_map_internal(m, v, &mss);
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 16/18] PTI - Mremap iterator abstraction
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
` (14 preceding siblings ...)
2006-07-13 4:29 ` [PATCH 15/18] PTI - Change protection iterator abstraction Paul Davies
@ 2006-07-13 4:29 ` Paul Davies
2006-07-13 4:29 ` [PATCH 17/18] PTI - Swapfile " Paul Davies
2006-07-13 4:29 ` [PATCH 18/18] PTI - Mempolicy " Paul Davies
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:29 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
1) Abstracts mremap iterator from mremap.c and
puts it in pt_default.c
2) Finishes abstracting smaps iterator from fs/proc/mmu_task.c and
puts it in pt_default.c
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
mremap.c | 136 ++++-------------------------------------------------------
pt-default.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 140 insertions(+), 125 deletions(-)
Index: linux-2.6.17.2/mm/mremap.c
===================================================================
--- linux-2.6.17.2.orig/mm/mremap.c 2006-07-09 01:42:40.596919768 +1000
+++ linux-2.6.17.2/mm/mremap.c 2006-07-09 01:43:41.324687744 +1000
@@ -18,139 +18,25 @@
#include <linux/highmem.h>
#include <linux/security.h>
#include <linux/syscalls.h>
+#include <linux/pt.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
-static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
+void mremap_move_pte(struct vm_area_struct *vma,
+ struct vm_area_struct *new_vma, pte_t *old_pte, pte_t *new_pte,
+ unsigned long old_addr, unsigned long new_addr)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
-
- pgd = pgd_offset(mm, addr);
- if (pgd_none_or_clear_bad(pgd))
- return NULL;
-
- pud = pud_offset(pgd, addr);
- if (pud_none_or_clear_bad(pud))
- return NULL;
-
- pmd = pmd_offset(pud, addr);
- if (pmd_none_or_clear_bad(pmd))
- return NULL;
+ pte_t pte;
- return pmd;
-}
-
-static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
-
- pgd = pgd_offset(mm, addr);
- pud = pud_alloc(mm, pgd, addr);
- if (!pud)
- return NULL;
-
- pmd = pmd_alloc(mm, pud, addr);
- if (!pmd)
- return NULL;
-
- if (!pmd_present(*pmd) && __pte_alloc(mm, pmd, addr))
- return NULL;
-
- return pmd;
-}
-
-static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
- unsigned long old_addr, unsigned long old_end,
- struct vm_area_struct *new_vma, pmd_t *new_pmd,
- unsigned long new_addr)
-{
- struct address_space *mapping = NULL;
- struct mm_struct *mm = vma->vm_mm;
- pte_t *old_pte, *new_pte, pte;
- spinlock_t *old_ptl, *new_ptl;
-
- if (vma->vm_file) {
- /*
- * Subtle point from Rajesh Venkatasubramanian: before
- * moving file-based ptes, we must lock vmtruncate out,
- * since it might clean the dst vma before the src vma,
- * and we propagate stale pages into the dst afterward.
- */
- mapping = vma->vm_file->f_mapping;
- spin_lock(&mapping->i_mmap_lock);
- if (new_vma->vm_truncate_count &&
- new_vma->vm_truncate_count != vma->vm_truncate_count)
- new_vma->vm_truncate_count = 0;
- }
-
- /*
- * We don't have to worry about the ordering of src and dst
- * pte locks because exclusive mmap_sem prevents deadlock.
- */
- old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
- new_pte = pte_offset_map_nested(new_pmd, new_addr);
- new_ptl = pte_lockptr(mm, new_pmd);
- if (new_ptl != old_ptl)
- spin_lock(new_ptl);
-
- for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
- new_pte++, new_addr += PAGE_SIZE) {
- if (pte_none(*old_pte))
- continue;
- pte = ptep_clear_flush(vma, old_addr, old_pte);
- /* ZERO_PAGE can be dependant on virtual addr */
- pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
- set_pte_at(mm, new_addr, new_pte, pte);
- }
-
- if (new_ptl != old_ptl)
- spin_unlock(new_ptl);
- pte_unmap_nested(new_pte - 1);
- pte_unmap_unlock(old_pte - 1, old_ptl);
- if (mapping)
- spin_unlock(&mapping->i_mmap_lock);
-}
-
-#define LATENCY_LIMIT (64 * PAGE_SIZE)
-
-static unsigned long move_page_tables(struct vm_area_struct *vma,
- unsigned long old_addr, struct vm_area_struct *new_vma,
- unsigned long new_addr, unsigned long len)
-{
- unsigned long extent, next, old_end;
- pmd_t *old_pmd, *new_pmd;
-
- old_end = old_addr + len;
- flush_cache_range(vma, old_addr, old_end);
-
- for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
- cond_resched();
- next = (old_addr + PMD_SIZE) & PMD_MASK;
- if (next - 1 > old_end)
- next = old_end;
- extent = next - old_addr;
- old_pmd = get_old_pmd(vma->vm_mm, old_addr);
- if (!old_pmd)
- continue;
- new_pmd = alloc_new_pmd(vma->vm_mm, new_addr);
- if (!new_pmd)
- break;
- next = (new_addr + PMD_SIZE) & PMD_MASK;
- if (extent > next - new_addr)
- extent = next - new_addr;
- if (extent > LATENCY_LIMIT)
- extent = LATENCY_LIMIT;
- move_ptes(vma, old_pmd, old_addr, old_addr + extent,
- new_vma, new_pmd, new_addr);
- }
+ if (pte_none(*old_pte))
+ return;
- return len + old_addr - old_end; /* how much done */
+ pte = ptep_clear_flush(vma, old_addr, old_pte);
+ /* ZERO_PAGE can be dependant on virtual addr */
+ pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
+ set_pte_at(vma->vm_mm, new_addr, new_pte, pte);
}
static unsigned long move_vma(struct vm_area_struct *vma,
Index: linux-2.6.17.2/mm/pt-default.c
===================================================================
--- linux-2.6.17.2.orig/mm/pt-default.c 2006-07-09 01:43:23.620379208 +1000
+++ linux-2.6.17.2/mm/pt-default.c 2006-07-09 01:43:52.910926368 +1000
@@ -913,3 +913,132 @@
cond_resched();
}
+static inline void smaps_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ struct mem_size_stats *mss)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ smaps_pte_range(vma, pmd, addr, next, mss);
+ } while (pmd++, addr = next, addr != end);
+}
+
+static inline void smaps_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ struct mem_size_stats *mss)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ smaps_pmd_range(vma, pud, addr, next, mss);
+ } while (pud++, addr = next, addr != end);
+}
+
+void smaps_read_range(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end,
+ struct mem_size_stats *mss)
+{
+ pgd_t *pgd;
+ unsigned long next;
+
+ pgd = pgd_offset(vma->vm_mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ smaps_pud_range(vma, pgd, addr, next, mss);
+ } while (pgd++, addr = next, addr != end);
+}
+
+#define MREMAP_LATENCY_LIMIT (64 * PAGE_SIZE)
+
+static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
+ unsigned long old_addr, unsigned long old_end,
+ struct vm_area_struct *new_vma, pmd_t *new_pmd,
+ unsigned long new_addr)
+{
+ struct address_space *mapping = NULL;
+ struct mm_struct *mm = vma->vm_mm;
+ pte_t *old_pte, *new_pte;
+ spinlock_t *old_ptl, *new_ptl;
+
+ if (vma->vm_file) {
+ /*
+ * Subtle point from Rajesh Venkatasubramanian: before
+ * moving file-based ptes, we must lock vmtruncate out,
+ * since it might clean the dst vma before the src vma,
+ * and we propagate stale pages into the dst afterward.
+ */
+ mapping = vma->vm_file->f_mapping;
+ spin_lock(&mapping->i_mmap_lock);
+ if (new_vma->vm_truncate_count &&
+ new_vma->vm_truncate_count != vma->vm_truncate_count)
+ new_vma->vm_truncate_count = 0;
+ }
+
+ /*
+ * We don't have to worry about the ordering of src and dst
+ * pte locks because exclusive mmap_sem prevents deadlock.
+ */
+ old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
+ new_pte = pte_offset_map_nested(new_pmd, new_addr);
+ new_ptl = pte_lockptr(mm, new_pmd);
+ if (new_ptl != old_ptl)
+ spin_lock(new_ptl);
+
+ for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
+ new_pte++, new_addr += PAGE_SIZE)
+ mremap_move_pte(vma, new_vma, old_pte, new_pte, old_addr, new_addr);
+
+ if (new_ptl != old_ptl)
+ spin_unlock(new_ptl);
+ pte_unmap_nested(new_pte - 1);
+ pte_unmap_unlock(old_pte - 1, old_ptl);
+ if (mapping)
+ spin_unlock(&mapping->i_mmap_lock);
+}
+
+unsigned long move_page_tables(struct vm_area_struct *vma,
+ unsigned long old_addr, struct vm_area_struct *new_vma,
+ unsigned long new_addr, unsigned long len)
+{
+ unsigned long extent, next, old_end;
+ pmd_t *old_pmd, *new_pmd;
+
+ old_end = old_addr + len;
+ flush_cache_range(vma, old_addr, old_end);
+
+ for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
+ cond_resched();
+ next = (old_addr + PMD_SIZE) & PMD_MASK;
+ if (next - 1 > old_end)
+ next = old_end;
+ extent = next - old_addr;
+ old_pmd = lookup_pmd(vma->vm_mm, old_addr);
+ if (!old_pmd)
+ continue;
+ new_pmd = build_pmd(vma->vm_mm, new_addr);
+ if (!new_pmd)
+ break;
+ next = (new_addr + PMD_SIZE) & PMD_MASK;
+ if (extent > next - new_addr)
+ extent = next - new_addr;
+ if (extent > MREMAP_LATENCY_LIMIT)
+ extent = MREMAP_LATENCY_LIMIT;
+ move_ptes(vma, old_pmd, old_addr, old_addr + extent,
+ new_vma, new_pmd, new_addr);
+ }
+
+ return len + old_addr - old_end; /* how much done */
+}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 17/18] PTI - Swapfile iterator abstraction
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
` (15 preceding siblings ...)
2006-07-13 4:29 ` [PATCH 16/18] PTI - Mremap " Paul Davies
@ 2006-07-13 4:29 ` Paul Davies
2006-07-13 4:29 ` [PATCH 18/18] PTI - Mempolicy " Paul Davies
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:29 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
Abstracts swapfile iterator from swapfile.c and
puts it in pt_default.c
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
pt-default.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
swapfile.c | 77 ++-------------------------------------------------------
2 files changed, 83 insertions(+), 73 deletions(-)
Index: linux-2.6.17.2/mm/swapfile.c
===================================================================
--- linux-2.6.17.2.orig/mm/swapfile.c 2006-06-30 10:17:23.000000000 +1000
+++ linux-2.6.17.2/mm/swapfile.c 2006-07-08 22:00:07.309931488 +1000
@@ -28,6 +28,7 @@
#include <linux/mutex.h>
#include <linux/capability.h>
#include <linux/syscalls.h>
+#include <linux/pt.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -483,7 +484,7 @@
* just let do_wp_page work it out if a write is requested later - to
* force COW, vm_page_prot omits write permission from any private vma.
*/
-static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
+void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
unsigned long addr, swp_entry_t entry, struct page *page)
{
inc_mm_counter(vma->vm_mm, anon_rss);
@@ -499,72 +500,10 @@
activate_page(page);
}
-static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, unsigned long end,
- swp_entry_t entry, struct page *page)
-{
- pte_t swp_pte = swp_entry_to_pte(entry);
- pte_t *pte;
- spinlock_t *ptl;
- int found = 0;
-
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
- do {
- /*
- * swapoff spends a _lot_ of time in this loop!
- * Test inline before going to call unuse_pte.
- */
- if (unlikely(pte_same(*pte, swp_pte))) {
- unuse_pte(vma, pte++, addr, entry, page);
- found = 1;
- break;
- }
- } while (pte++, addr += PAGE_SIZE, addr != end);
- pte_unmap_unlock(pte - 1, ptl);
- return found;
-}
-
-static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
- unsigned long addr, unsigned long end,
- swp_entry_t entry, struct page *page)
-{
- pmd_t *pmd;
- unsigned long next;
-
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd))
- continue;
- if (unuse_pte_range(vma, pmd, addr, next, entry, page))
- return 1;
- } while (pmd++, addr = next, addr != end);
- return 0;
-}
-
-static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
- unsigned long addr, unsigned long end,
- swp_entry_t entry, struct page *page)
-{
- pud_t *pud;
- unsigned long next;
-
- pud = pud_offset(pgd, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
- if (unuse_pmd_range(vma, pud, addr, next, entry, page))
- return 1;
- } while (pud++, addr = next, addr != end);
- return 0;
-}
-
static int unuse_vma(struct vm_area_struct *vma,
swp_entry_t entry, struct page *page)
{
- pgd_t *pgd;
- unsigned long addr, end, next;
+ unsigned long addr, end;
if (page->mapping) {
addr = page_address_in_vma(page, vma);
@@ -577,15 +516,7 @@
end = vma->vm_end;
}
- pgd = pgd_offset(vma->vm_mm, addr);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- if (unuse_pud_range(vma, pgd, addr, next, entry, page))
- return 1;
- } while (pgd++, addr = next, addr != end);
- return 0;
+ return unuse_vma_read_iterator(vma, addr, end, entry, page);
}
static int unuse_mm(struct mm_struct *mm,
Index: linux-2.6.17.2/mm/pt-default.c
===================================================================
--- linux-2.6.17.2.orig/mm/pt-default.c 2006-07-08 21:53:14.552151216 +1000
+++ linux-2.6.17.2/mm/pt-default.c 2006-07-08 22:01:25.216087952 +1000
@@ -1042,3 +1042,82 @@
return len + old_addr - old_end; /* how much done */
}
+
+static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ swp_entry_t entry, struct page *page)
+{
+ pte_t swp_pte = swp_entry_to_pte(entry);
+ pte_t *pte;
+ spinlock_t *ptl;
+ int found = 0;
+
+ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ do {
+ /*
+ * swapoff spends a _lot_ of time in this loop!
+ * Test inline before going to call unuse_pte.
+ */
+ if (unlikely(pte_same(*pte, swp_pte))) {
+ unuse_pte(vma, pte++, addr, entry, page);
+ found = 1;
+ break;
+ }
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap_unlock(pte - 1, ptl);
+ return found;
+}
+
+static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ swp_entry_t entry, struct page *page)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ if (unuse_pte_range(vma, pmd, addr, next, entry, page))
+ return 1;
+ } while (pmd++, addr = next, addr != end);
+ return 0;
+}
+
+static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ swp_entry_t entry, struct page *page)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ if (unuse_pmd_range(vma, pud, addr, next, entry, page))
+ return 1;
+ } while (pud++, addr = next, addr != end);
+ return 0;
+}
+
+int unuse_vma_read_iterator(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end, swp_entry_t entry,
+ struct page *page)
+{
+ pgd_t *pgd;
+ unsigned long next;
+
+ pgd = pgd_offset(vma->vm_mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ if (unuse_pud_range(vma, pgd, addr, next, entry, page))
+ return 1;
+ } while (pgd++, addr = next, addr != end);
+ return 0;
+}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH 18/18] PTI - Mempolicy iterator abstraction
2006-07-13 4:26 [PATCH 0/18] PTI - Explanation Paul Davies
` (16 preceding siblings ...)
2006-07-13 4:29 ` [PATCH 17/18] PTI - Swapfile " Paul Davies
@ 2006-07-13 4:29 ` Paul Davies
17 siblings, 0 replies; 19+ messages in thread
From: Paul Davies @ 2006-07-13 4:29 UTC (permalink / raw)
To: linux-mm; +Cc: Paul Davies
Abstracts mempolicy iterator from mempolicy.c and
puts it in pt_default.c
Signed-Off-By: Paul Davies <pauld@gelato.unsw.edu.au>
---
mempolicy.c | 140 +++++++++++++++--------------------------------------------
pt-default.c | 83 ++++++++++++++++++++++++++++++++++
2 files changed, 120 insertions(+), 103 deletions(-)
Index: linux-2.6.17.2/mm/mempolicy.c
===================================================================
--- linux-2.6.17.2.orig/mm/mempolicy.c 2006-06-30 10:17:23.000000000 +1000
+++ linux-2.6.17.2/mm/mempolicy.c 2006-07-08 22:04:39.561542952 +1000
@@ -87,6 +87,7 @@
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/migrate.h>
+#include <linux/pt.h>
#include <asm/tlbflush.h>
#include <asm/uaccess.h>
@@ -199,111 +200,44 @@
static void migrate_page_add(struct page *page, struct list_head *pagelist,
unsigned long flags);
-/* Scan through pages checking if pages follow certain conditions. */
-static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, unsigned long end,
- const nodemask_t *nodes, unsigned long flags,
- void *private)
-{
- pte_t *orig_pte;
- pte_t *pte;
- spinlock_t *ptl;
-
- orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
- do {
- struct page *page;
- unsigned int nid;
-
- if (!pte_present(*pte))
- continue;
- page = vm_normal_page(vma, addr, *pte);
- if (!page)
- continue;
- /*
- * The check for PageReserved here is important to avoid
- * handling zero pages and other pages that may have been
- * marked special by the system.
- *
- * If the PageReserved would not be checked here then f.e.
- * the location of the zero page could have an influence
- * on MPOL_MF_STRICT, zero pages would be counted for
- * the per node stats, and there would be useless attempts
- * to put zero pages on the migration list.
- */
- if (PageReserved(page))
- continue;
- nid = page_to_nid(page);
- if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
- continue;
-
- if (flags & MPOL_MF_STATS)
- gather_stats(page, private, pte_dirty(*pte));
- else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
- migrate_page_add(page, private, flags);
- else
- break;
- } while (pte++, addr += PAGE_SIZE, addr != end);
- pte_unmap_unlock(orig_pte, ptl);
- return addr != end;
-}
-
-static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
- unsigned long addr, unsigned long end,
- const nodemask_t *nodes, unsigned long flags,
- void *private)
-{
- pmd_t *pmd;
- unsigned long next;
-
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd))
- continue;
- if (check_pte_range(vma, pmd, addr, next, nodes,
- flags, private))
- return -EIO;
- } while (pmd++, addr = next, addr != end);
- return 0;
-}
-
-static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
- unsigned long addr, unsigned long end,
- const nodemask_t *nodes, unsigned long flags,
- void *private)
+int mempolicy_check_one_pte(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *pte, const nodemask_t *nodes, unsigned long flags,
+ void *private)
{
- pud_t *pud;
- unsigned long next;
+ struct page *page;
+ unsigned int nid;
- pud = pud_offset(pgd, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
- if (check_pmd_range(vma, pud, addr, next, nodes,
- flags, private))
- return -EIO;
- } while (pud++, addr = next, addr != end);
- return 0;
-}
+ if (!pte_present(*pte))
+ return 0;
+ page = vm_normal_page(vma, addr, *pte);
+ if (!page)
+ return 0;
+ if (!page)
+ return 0;
+ /*
+ * The check for PageReserved here is important to avoid
+ * handling zero pages and other pages that may have been
+ * marked special by the system.
+ *
+ * If the PageReserved would not be checked here then f.e.
+ * the location of the zero page could have an influence
+ * on MPOL_MF_STRICT, zero pages would be counted for
+ * the per node stats, and there would be useless attempts
+ * to put zero pages on the migration list.
+ */
+ if (PageReserved(page))
+ return 0;
+ nid = page_to_nid(page);
+ if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
+ return 0;
-static inline int check_pgd_range(struct vm_area_struct *vma,
- unsigned long addr, unsigned long end,
- const nodemask_t *nodes, unsigned long flags,
- void *private)
-{
- pgd_t *pgd;
- unsigned long next;
+ if (flags & MPOL_MF_STATS)
+ gather_stats(page, private, pte_dirty(*pte));
+ else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+ migrate_page_add(page, private, flags);
+ else
+ return 1;
- pgd = pgd_offset(vma->vm_mm, addr);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- if (check_pud_range(vma, pgd, addr, next, nodes,
- flags, private))
- return -EIO;
- } while (pgd++, addr = next, addr != end);
return 0;
}
@@ -356,7 +290,7 @@
endvma = end;
if (vma->vm_start > start)
start = vma->vm_start;
- err = check_pgd_range(vma, start, endvma, nodes,
+ err = check_policy_read_iterator(vma, start, endvma, nodes,
flags, private);
if (err) {
first = ERR_PTR(err);
@@ -1833,7 +1767,7 @@
check_huge_range(vma, vma->vm_start, vma->vm_end, md);
seq_printf(m, " huge");
} else {
- check_pgd_range(vma, vma->vm_start, vma->vm_end,
+ check_policy_read_iterator(vma, vma->vm_start, vma->vm_end,
&node_online_map, MPOL_MF_STATS, md);
}
Index: linux-2.6.17.2/mm/pt-default.c
===================================================================
--- linux-2.6.17.2.orig/mm/pt-default.c 2006-07-08 22:01:25.216087952 +1000
+++ linux-2.6.17.2/mm/pt-default.c 2006-07-08 22:06:07.221216656 +1000
@@ -1121,3 +1121,86 @@
} while (pgd++, addr = next, addr != end);
return 0;
}
+
+#ifdef CONFIG_NUMA
+/* Scan through pages checking if pages follow certain conditions. */
+static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ const nodemask_t *nodes, unsigned long flags,
+ void *private)
+{
+ pte_t *orig_pte;
+ pte_t *pte;
+ spinlock_t *ptl;
+ int ret;
+
+ orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ do {
+ ret = mempolicy_check_one_pte(vma, addr, pte, nodes, flags, private);
+ if(ret)
+ break;
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap_unlock(orig_pte, ptl);
+ return addr != end;
+}
+
+static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ const nodemask_t *nodes, unsigned long flags,
+ void *private)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ if (check_pte_range(vma, pmd, addr, next, nodes,
+ flags, private))
+ return -EIO;
+ } while (pmd++, addr = next, addr != end);
+ return 0;
+}
+
+static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ const nodemask_t *nodes, unsigned long flags,
+ void *private)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ if (check_pmd_range(vma, pud, addr, next, nodes,
+ flags, private))
+ return -EIO;
+ } while (pud++, addr = next, addr != end);
+ return 0;
+}
+
+int check_policy_read_iterator(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end,
+ const nodemask_t *nodes, unsigned long flags,
+ void *private)
+{
+ pgd_t *pgd;
+ unsigned long next;
+
+ pgd = pgd_offset(vma->vm_mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ if (check_pud_range(vma, pgd, addr, next, nodes,
+ flags, private))
+ return -EIO;
+ } while (pgd++, addr = next, addr != end);
+ return 0;
+}
+#endif
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 19+ messages in thread