diff -rup linux-2.5.70-mjb1-banana_split/arch/i386/kernel/vm86.c linux-2.5.70-mjb1-ukva/arch/i386/kernel/vm86.c --- linux-2.5.70-mjb1-banana_split/arch/i386/kernel/vm86.c Thu Jun 12 21:37:20 2003 +++ linux-2.5.70-mjb1-ukva/arch/i386/kernel/vm86.c Fri Jun 13 09:07:47 2003 @@ -152,7 +152,7 @@ static void mark_screen_rdonly(struct ta pmd_clear(pmd); goto out; } - pte = mapped = pte_offset_map(pmd, 0xA0000); + pte = mapped = pte_offset_map(tsk->mm, pmd, 0xA0000); for (i = 0; i < 32; i++) { if (pte_present(*pte)) set_pte(pte, pte_wrprotect(*pte)); diff -rup linux-2.5.70-mjb1-banana_split/arch/i386/mm/init.c linux-2.5.70-mjb1-ukva/arch/i386/mm/init.c --- linux-2.5.70-mjb1-banana_split/arch/i386/mm/init.c Thu Jun 12 21:40:03 2003 +++ linux-2.5.70-mjb1-ukva/arch/i386/mm/init.c Fri Jun 13 10:01:42 2003 @@ -304,6 +304,13 @@ extern void set_highmem_pages_init(int); unsigned long __PAGE_KERNEL = _PAGE_KERNEL; +/* + * The UKVA pages are per-process, so the need to be flushed + * just like user pages. (flushed by __tlb_flush() instead of + * just __tlb_flush_all()) + */ +unsigned long __PAGE_UKVA = _PAGE_KERNEL; + #ifndef CONFIG_DISCONTIGMEM #define remap_numa_kva() do {} while (0) #else diff -rup linux-2.5.70-mjb1-banana_split/arch/i386/mm/pgtable.c linux-2.5.70-mjb1-ukva/arch/i386/mm/pgtable.c --- linux-2.5.70-mjb1-banana_split/arch/i386/mm/pgtable.c Thu Jun 12 22:04:57 2003 +++ linux-2.5.70-mjb1-ukva/arch/i386/mm/pgtable.c Fri Jun 13 09:44:20 2003 @@ -225,6 +225,66 @@ void pgd_dtor(void *pgd, kmem_cache_t *c spin_unlock_irqrestore(&pgd_lock, flags); } +/* + * There are 4 pages allocated, just for mapping the UKVA PTE area. + * They provide the virutal space necessary for mapping the 8MB + * of other PTEs + * + * Of these pages, one is special, because it contains entries + * for itself and the other 3 pages. The UKVA PTE area is aligned + * on an 8MB boundary, so that there stays only 1 of these special + * pages, and the area doesn't get spread out over more than 1 PMD. + * This reduces the number of pages which must be kmapped here. + */ +void pmd_alloc_ukva(pmd_t *pmd) +{ + /* the self-referential pte page */ + struct page* recursive_pte_page; + pmd_t *ukva_pmd = pmd + pmd_index(UKVA_PTE_START); + pte_t *pte_page_kmap; + pte_t *pte; + int j; + + recursive_pte_page = alloc_pages(GFP_ATOMIC, 0); + + /* + * the recursive page must be kmapped because entries need + * to be made in it, and the page tables that we're working + * on aren't active yet. + */ + pte_page_kmap = kmap_atomic(recursive_pte_page, KM_PTE0); + clear_page(pte_page_kmap); + + pte = &pte_page_kmap[pte_index(__FIRST_UKVA_PTE)]; + for (j = 0; j < PTRS_PER_PGD; ++j, ukva_pmd++, pte++) { + struct page* ukva_page; + if (j != pgd_index(UKVA_PTE_START)) { + ukva_page = alloc_pages(GFP_ATOMIC, 0); + clear_highpage(ukva_page); + } else + ukva_page = recursive_pte_page; + set_pte(pte, mk_pte(ukva_page, PAGE_KERNEL_UKVA)); + pmd_populate(NULL, ukva_pmd, ukva_page); + } + + kunmap_atomic(pte_page_kmap, KM_PTE0); +} + +/* + * for simplicity, the UKVA area is aligned so that + * it will all be mapped inside a single PMD. + */ +void pmd_free_ukva(pmd_t *pmd) +{ + pmd_t *ukva_pmd = pmd + pmd_index(UKVA_PTE_START); + int j; + for (j = 0; j < 4; j++) { + clear_highpage(pmd_page(ukva_pmd[j])); + __free_pages(pmd_page(ukva_pmd[j]), 0); + pmd_clear(&ukva_pmd[j]); + } +} + pgd_t *pgd_alloc(struct mm_struct *mm) { int i; @@ -247,6 +307,9 @@ pgd_t *pgd_alloc(struct mm_struct *mm) goto out_oom; /* bleh. that's ugly, bad wli */ set_pgd(&pgd[i], __pgd(1 + __pa((u64)((u32)pmd)))); + + if (pgd_index(UKVA_PTE_START) == i ) + pmd_alloc_ukva(pmd); } return pgd; @@ -274,6 +337,9 @@ void pgd_free(pgd_t *pgd) kmem_cache_free(pmd_cache, pmd_to_free); else if (i == FIRST_KERNEL_PGD_PTR) kmem_cache_free(kernel_pmd_cache, pmd_to_free); + + if (i == pgd_index(UKVA_PTE_START)) + pmd_free_ukva(pmd_to_free); } } /* in the non-PAE case, clear_page_tables() clears user pgd entries */ diff -rup linux-2.5.70-mjb1-banana_split/include/asm-generic/rmap.h linux-2.5.70-mjb1-ukva/include/asm-generic/rmap.h --- linux-2.5.70-mjb1-banana_split/include/asm-generic/rmap.h Thu Jun 12 21:37:18 2003 +++ linux-2.5.70-mjb1-ukva/include/asm-generic/rmap.h Fri Jun 13 09:07:47 2003 @@ -47,16 +47,36 @@ static inline void pgtable_remove_rmap(s dec_page_state(nr_page_table_pages); } +static inline int is_ukva_pte(pte_t *pte) +{ + unsigned long pteaddr = (unsigned long)pte; + if (pteaddr >= UKVA_PTE_START && + pteaddr <= UKVA_PTE_END ) + return 1; + return 0; +} + static inline struct mm_struct * ptep_to_mm(pte_t * ptep) { - struct page * page = kmap_atomic_to_page(ptep); + struct page * page; + + if (is_ukva_pte(ptep)) + page = pte_page(*ptep); + else + page = kmap_atomic_to_page(ptep); return (struct mm_struct *) page->mapping; } static inline unsigned long ptep_to_address(pte_t * ptep) { - struct page * page = kmap_atomic_to_page(ptep); + struct page * page; unsigned long low_bits; + + if (is_ukva_pte(ptep)) + page = pte_page(*ptep); + else + page = kmap_atomic_to_page(ptep); + low_bits = ((unsigned long)ptep & ~PAGE_MASK) * PTRS_PER_PTE; return page->index + low_bits; } @@ -64,8 +84,14 @@ static inline unsigned long ptep_to_addr #ifdef CONFIG_HIGHPTE static inline pte_addr_t ptep_to_paddr(pte_t *ptep) { + unsigned long pfn; pte_addr_t paddr; - paddr = ((pte_addr_t)page_to_pfn(kmap_atomic_to_page(ptep))) << PAGE_SHIFT; + if (is_ukva_pte(ptep)) { + pfn = pte_pfn(*ukva_pte_offset(ptep)); + } else { + pfn = page_to_pfn(kmap_atomic_to_page(ptep)); + } + paddr = (pte_addr_t)(pfn << PAGE_SHIFT); return paddr + (pte_addr_t)((unsigned long)ptep & ~PAGE_MASK); } #else diff -rup linux-2.5.70-mjb1-banana_split/include/asm-i386/mmu_context.h linux-2.5.70-mjb1-ukva/include/asm-i386/mmu_context.h --- linux-2.5.70-mjb1-banana_split/include/asm-i386/mmu_context.h Thu Jun 12 21:37:18 2003 +++ linux-2.5.70-mjb1-ukva/include/asm-i386/mmu_context.h Fri Jun 13 09:07:47 2003 @@ -24,7 +24,7 @@ static inline void enter_lazy_tlb(struct static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) { - if (likely(prev != next)) { + if (1 || likely(prev != next)) { /* stop flush ipis for the previous mm */ clear_bit(cpu, &prev->cpu_vm_mask); #ifdef CONFIG_SMP diff -rup linux-2.5.70-mjb1-banana_split/include/asm-i386/pgtable-3level.h linux-2.5.70-mjb1-ukva/include/asm-i386/pgtable-3level.h --- linux-2.5.70-mjb1-banana_split/include/asm-i386/pgtable-3level.h Thu Jun 12 21:37:19 2003 +++ linux-2.5.70-mjb1-ukva/include/asm-i386/pgtable-3level.h Fri Jun 13 09:07:47 2003 @@ -53,6 +53,7 @@ static inline void set_pte(pte_t *ptep, set_64bit((unsigned long long *)(pteptr),pte_val(pteval)) #define set_pmd(pmdptr,pmdval) \ set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval)) + #define set_pgd(pgdptr,pgdval) \ set_64bit((unsigned long long *)(pgdptr),pgd_val(pgdval)) diff -rup linux-2.5.70-mjb1-banana_split/include/asm-i386/pgtable.h linux-2.5.70-mjb1-ukva/include/asm-i386/pgtable.h --- linux-2.5.70-mjb1-banana_split/include/asm-i386/pgtable.h Thu Jun 12 21:38:47 2003 +++ linux-2.5.70-mjb1-ukva/include/asm-i386/pgtable.h Fri Jun 13 10:36:08 2003 @@ -105,7 +105,56 @@ static inline int USER_PTRS_PER_PMD(int ~(VMALLOC_OFFSET-1)) #define VMALLOC_VMADDR(x) ((unsigned long)(x)) #ifdef CONFIG_HIGHMEM -# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) +# define UKVA_END (PKMAP_BASE-2*PAGE_SIZE) +# define UKVA_START (UKVA_END-(1<<20)*16) + +/* + * there must be virtual space for enough ptes to map each + * of the pagetable pages in the system. + * + * the physical space which will underly this virutal space will + * be allocated later + * + * The start of this area is aligned on a 8MB boundary, which guarantees the + * UKVA PTE pages themselves will be able to be mapped by only 4 ptes. These + * 4 ptes will be guaranteed to be mapped by a single pte. This also + * guarantees that a single PMD page will map it, too. This makes PMD + * allocation easier, and keeps us from kmapping as much during init. + */ +# define UKVA_PTE_SIZE (PTRS_PER_PGD*PTRS_PER_PMD*PTRS_PER_PTE*sizeof(pte_t)) +# define UKVA_PTE_MASK (~(UKVA_PTE_SIZE-1)) + +/* + * the virtual address of the first and last UKVA PTE. Note that the END is + * not the boundary of the space, but the virtual address of the last one. + * + * the area between UKVA_START and UKVA_PTE_START is available for other use + */ +# define UKVA_PTE_START ((UKVA_START&UKVA_PTE_MASK)+UKVA_PTE_SIZE) +# define UKVA_PTE_END (UKVA_PTE_START+UKVA_PTE_SIZE-sizeof(pte_t)) + +/* + * These provide shortcuts to the ptes which map the UKVA PTE area itself + */ +# define FIRST_UKVA_PTE (ukva_pte_offset((void *)UKVA_PTE_START)) +# define LAST_UKVA_PTE (ukva_pte_offset((void *)UKVA_PTE_END)) +# define __FIRST_UKVA_PTE ((unsigned long)FIRST_UKVA_PTE) +# define __LAST_UKVA_PTE ((unsigned long)LAST_UKVA_PTE) + +static inline unsigned long __ukva_pte_index(void *address) +{ + return ((unsigned long)address)>>PAGE_SHIFT; +} + +/* + * ukva_pte_offset(address) calculates the UKVA virtual address of the pte + * which controls "address". This doesn't guarantee that there will be + * anything there, it just gives the address where it _would_ be. + */ + +#define ukva_pte_offset(address) &((pte_t *)UKVA_PTE_START)[__ukva_pte_index(address)] + +# define VMALLOC_END (UKVA_START-2*PAGE_SIZE) #else # define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) #endif @@ -158,6 +207,7 @@ extern unsigned long __PAGE_KERNEL; #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) #define PAGE_KERNEL __pgprot(__PAGE_KERNEL) +#define PAGE_KERNEL_UKVA __pgprot(_PAGE_KERNEL) #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) @@ -306,11 +356,34 @@ static inline pte_t pte_modify(pte_t pte ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) #if defined(CONFIG_HIGHPTE) -#define pte_offset_map(dir, address) \ +extern int ukva_hits; +extern int ukva_misses[]; +/* + * There are times where current->mm->pgd is not actually resident in + * cr3. This is during times when we're in a transitionary state, + * or doing some lazy flushing. I need to figure out what's going on, + * but until then, take the cowardly route and fall back to kmap. + */ +#define funny_mm() (current->mm != current->active_mm) +#define pte_offset_map(__mm, dir, address) \ + (\ + (!funny_mm() && (__mm) == current->mm) ? \ + ukva_pte_offset((void*)address) \ + :\ + __pte_offset_map(dir,address)\ + ) +#define __pte_offset_map(dir, address) \ ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) #define pte_offset_map_nested(dir, address) \ ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) -#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) +#define pte_unmap(pte) do { \ + if( (unsigned long)pte >= UKVA_PTE_START && \ + (unsigned long)pte <= UKVA_PTE_END ){ \ + /* it was a ukva pte, no need to unmap */ \ + } \ + else \ + kunmap_atomic(pte, KM_PTE0); \ +} while (0) #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) #else #define pte_offset_map(dir, address) \ diff -rup linux-2.5.70-mjb1-banana_split/include/linux/mm.h linux-2.5.70-mjb1-ukva/include/linux/mm.h --- linux-2.5.70-mjb1-banana_split/include/linux/mm.h Thu Jun 12 21:37:18 2003 +++ linux-2.5.70-mjb1-ukva/include/linux/mm.h Fri Jun 13 09:07:47 2003 @@ -414,7 +414,7 @@ void zap_page_range(struct vm_area_struc int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, struct vm_area_struct *start_vma, unsigned long start_addr, unsigned long end_addr, unsigned long *nr_accounted); -void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, +void unmap_page_range(struct mm_struct *mm, struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long address, unsigned long size); void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr); int copy_page_range(struct mm_struct *dst, struct mm_struct *src, @@ -426,6 +426,7 @@ extern int vmtruncate(struct inode * ino extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); +extern pte_t *FASTCALL(__pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address, int shouldwarn)); extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); extern int make_pages_present(unsigned long addr, unsigned long end); diff -rup linux-2.5.70-mjb1-banana_split/mm/memory.c linux-2.5.70-mjb1-ukva/mm/memory.c --- linux-2.5.70-mjb1-banana_split/mm/memory.c Thu Jun 12 21:38:47 2003 +++ linux-2.5.70-mjb1-ukva/mm/memory.c Fri Jun 13 10:43:07 2003 @@ -149,11 +149,51 @@ void clear_page_tables(struct mmu_gather } while (--nr); } +/* + * Go find the pte which controls "ukva_pte". Point it to "new" + * + * This is effectively the fallback from UKVA to kmap, in the case + * that a pte allocation was requested for an mm which isn't resident + */ +u64 ukva_map_pte_other(struct mm_struct *mm, struct page* new, pte_t *ukva_pte) +{ + pgd_t *ukva_pgd; + pmd_t *ukva_pmd; + pte_t *ukva_mapped_pte; + unsigned long ukva_pte_addr = (unsigned long)ukva_pte; + u64 ret; + + ukva_pgd = pgd_offset(mm, ukva_pte_addr); + ukva_pmd = pmd_offset(ukva_pgd, ukva_pte_addr); + /* + * use __pte_offset_map(), so that this will never use the UKVA + * addresses. + */ + ukva_mapped_pte = __pte_offset_map(ukva_pmd, ukva_pte_addr); + set_pte(ukva_mapped_pte, mk_pte(new, PAGE_KERNEL_UKVA)); + ret = pte_val(*ukva_mapped_pte); + pte_unmap(ukva_mapped_pte); + return ret; +} + pte_t * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { + + /* + * what is the address of the ukva pte which controls "address"? + * In other words, where will the newly allocated PTE reside + * virtually? + */ + pte_t *ukva_pte_vaddr = ukva_pte_offset((void*)address); + /* + * what is the address of the pte that controls _that_ address? + * This will have to be set before there is anything mapped into the + * ukva_pte_vaddr address. + */ + pte_t *ukva_pte_controller = ukva_pte_offset(ukva_pte_vaddr); + if (!pmd_present(*pmd)) { struct page *new; - spin_unlock(&mm->page_table_lock); new = pte_alloc_one(mm, address); spin_lock(&mm->page_table_lock); @@ -168,11 +208,32 @@ pte_t * pte_alloc_map(struct mm_struct * pte_free(new); goto out; } + + /* + * If we're running in the mm's context, we can take a shortcut + * to the ukva entries, because they're already mapped + */ + if ( !funny_mm() && mm == current->mm ) + set_pte(ukva_pte_controller, mk_pte(new, __pgprot(_PAGE_KERNEL))); + else { + /* + * If mm isn't the current one, we need to map the + * pte falling back to kmap()s. In addition + * to the kmap()ing, this function updates + * mm's pagetables to have valid ukva information + * for when that mm is active + */ + ukva_map_pte_other(mm, new, ukva_pte_vaddr); + } pgtable_add_rmap(new, mm, address); pmd_populate(mm, pmd, new); - } + } + out: - return pte_offset_map(pmd, address); + if (!funny_mm() && mm == current->mm) + return ukva_pte_vaddr; + else + return __pte_offset_map(pmd, address); } pte_t * pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address) @@ -272,6 +333,7 @@ skip_copy_pmd_range: address = (address if (pmd_bad(*src_pmd)) { pmd_ERROR(*src_pmd); pmd_clear(src_pmd); + BUG(); skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; if (address >= end) @@ -282,8 +344,25 @@ skip_copy_pte_range: dst_pte = pte_alloc_map(dst, dst_pmd, address); if (!dst_pte) goto nomem; - spin_lock(&src->page_table_lock); + spin_lock(&src->page_table_lock); + /* + * copy_page_range() is only called by dup_mmap(), with + * current->mm as its src mm. + * + * This used to be pte_offset_map_nested(), but because of + * ukva, and the fact that src is always current->mm, we + * can take the standard ukva shortcut here. + * + * if src ever becomes !current->mm, this will freak out + * because pte_offset_map will fall back to kmapping(), + * and collide with the above pte_alloc_map(). You could + * always start passing a flag around to pte_alloc_map(), and + * get it to switch to the nested kmap slot for this, if + * you ever hit the bug, which will never happen :P + */ + BUG_ON(src != current->mm); src_pte = pte_offset_map_nested(src_pmd, address); + /* src_pte = pte_offset_map(src, src_pmd, address); */ do { pte_t pte = *src_pte; struct page *page; @@ -356,7 +435,7 @@ skip_copy_pte_range: if (!pte_chain) goto nomem; spin_lock(&src->page_table_lock); - dst_pte = pte_offset_map(dst_pmd, address); + dst_pte = pte_offset_map(dst, dst_pmd, address); src_pte = pte_offset_map_nested(src_pmd, address); cont_copy_pte_range_noset: @@ -389,7 +468,7 @@ nomem: } static void -zap_pte_range(struct mmu_gather *tlb, pmd_t * pmd, +zap_pte_range(struct mm_struct *mm, struct mmu_gather *tlb, pmd_t * pmd, unsigned long address, unsigned long size) { unsigned long offset; @@ -402,7 +481,7 @@ zap_pte_range(struct mmu_gather *tlb, pm pmd_clear(pmd); return; } - ptep = pte_offset_map(pmd, address); + ptep = pte_offset_map(mm, pmd, address); offset = address & ~PMD_MASK; if (offset + size > PMD_SIZE) size = PMD_SIZE - offset; @@ -439,7 +518,7 @@ zap_pte_range(struct mmu_gather *tlb, pm } static void -zap_pmd_range(struct mmu_gather *tlb, pgd_t * dir, +zap_pmd_range(struct mm_struct *mm, struct mmu_gather *tlb, pgd_t * dir, unsigned long address, unsigned long size) { pmd_t * pmd; @@ -457,13 +536,13 @@ zap_pmd_range(struct mmu_gather *tlb, pg if (end > ((address + PGDIR_SIZE) & PGDIR_MASK)) end = ((address + PGDIR_SIZE) & PGDIR_MASK); do { - zap_pte_range(tlb, pmd, address, end - address); + zap_pte_range(mm, tlb, pmd, address, end - address); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address < end); } -void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, +void unmap_page_range(struct mm_struct *mm, struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long address, unsigned long end) { pgd_t * dir; @@ -478,7 +557,7 @@ void unmap_page_range(struct mmu_gather dir = pgd_offset(vma->vm_mm, address); tlb_start_vma(tlb, vma); do { - zap_pmd_range(tlb, dir, address, end - address); + zap_pmd_range(mm, tlb, dir, address, end - address); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); @@ -572,7 +651,7 @@ int unmap_vmas(struct mmu_gather **tlbp, tlb_start_valid = 1; } - unmap_page_range(*tlbp, vma, start, start + block); + unmap_page_range(mm, *tlbp, vma, start, start + block); start += block; zap_bytes -= block; if ((long)zap_bytes > 0) @@ -650,7 +729,7 @@ follow_page(struct mm_struct *mm, unsign if (pmd_bad(*pmd)) goto out; - ptep = pte_offset_map(pmd, address); + ptep = pte_offset_map(mm, pmd, address); if (!ptep) goto out; @@ -814,7 +893,7 @@ static void zeromap_pte_range(pte_t * pt } while (address && (address < end)); } -static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, +static int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size, pgprot_t prot) { unsigned long end; @@ -891,7 +970,7 @@ static inline void remap_pte_range(pte_t } while (address && (address < end)); } -static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size, +static int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size, unsigned long phys_addr, pgprot_t prot) { unsigned long base, end; @@ -1045,7 +1124,7 @@ static int do_wp_page(struct mm_struct * * Re-check the pte - we dropped the lock */ spin_lock(&mm->page_table_lock); - page_table = pte_offset_map(pmd, address); + page_table = pte_offset_map(mm, pmd, address); if (pte_same(*page_table, pte)) { if (PageReserved(old_page)) ++mm->rss; @@ -1203,7 +1282,7 @@ static int do_swap_page(struct mm_struct * we released the page table lock. */ spin_lock(&mm->page_table_lock); - page_table = pte_offset_map(pmd, address); + page_table = pte_offset_map(mm, pmd, address); if (pte_same(*page_table, orig_pte)) ret = VM_FAULT_OOM; else @@ -1231,7 +1310,7 @@ static int do_swap_page(struct mm_struct * released the page table lock. */ spin_lock(&mm->page_table_lock); - page_table = pte_offset_map(pmd, address); + page_table = pte_offset_map(mm, pmd, address); if (!pte_same(*page_table, orig_pte)) { pte_unmap(page_table); spin_unlock(&mm->page_table_lock); @@ -1290,7 +1369,7 @@ do_anonymous_page(struct mm_struct *mm, if (!pte_chain) goto no_mem; spin_lock(&mm->page_table_lock); - page_table = pte_offset_map(pmd, addr); + page_table = pte_offset_map(mm, pmd, addr); } /* Read-only mapping of ZERO_PAGE. */ @@ -1308,7 +1387,7 @@ do_anonymous_page(struct mm_struct *mm, clear_user_highpage(page, addr); spin_lock(&mm->page_table_lock); - page_table = pte_offset_map(pmd, addr); + page_table = pte_offset_map(mm, pmd, addr); if (!pte_none(*page_table)) { pte_unmap(page_table); @@ -1402,7 +1481,7 @@ do_no_page(struct mm_struct *mm, struct } spin_lock(&mm->page_table_lock); - page_table = pte_offset_map(pmd, address); + page_table = pte_offset_map(mm, pmd, address); /* * This silly early PAGE_DIRTY setting removes a race @@ -1631,7 +1710,7 @@ struct page * vmalloc_to_page(void * vma pmd = pmd_offset(pgd, addr); if (!pmd_none(*pmd)) { preempt_disable(); - ptep = pte_offset_map(pmd, addr); + ptep = pte_offset_map(&init_mm, pmd, addr); pte = *ptep; if (pte_present(pte)) page = pte_page(pte); diff -rup linux-2.5.70-mjb1-banana_split/mm/mprotect.c linux-2.5.70-mjb1-ukva/mm/mprotect.c --- linux-2.5.70-mjb1-banana_split/mm/mprotect.c Thu Jun 12 21:37:33 2003 +++ linux-2.5.70-mjb1-ukva/mm/mprotect.c Fri Jun 13 10:34:54 2003 @@ -24,8 +24,10 @@ #include static inline void -change_pte_range(pmd_t *pmd, unsigned long address, - unsigned long size, pgprot_t newprot) +change_pte_range(struct mm_struct *mm, + pmd_t *pmd, unsigned long address, + unsigned long size, pgprot_t newprot, + unsigned long realaddress) { pte_t * pte; unsigned long end; @@ -37,7 +39,11 @@ change_pte_range(pmd_t *pmd, unsigned lo pmd_clear(pmd); return; } - pte = pte_offset_map(pmd, address); + /* + * there is only one path leading here, and it always uses the + * current process's pagetables, so we can use ukva here + */ + pte = ukva_pte_offset((void*)realaddress); address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) @@ -60,12 +66,14 @@ change_pte_range(pmd_t *pmd, unsigned lo } static inline void -change_pmd_range(pgd_t *pgd, unsigned long address, +change_pmd_range(struct mm_struct *mm, pgd_t *pgd, unsigned long address, unsigned long size, pgprot_t newprot) { pmd_t * pmd; unsigned long end; + unsigned long realaddress = address; + if (pgd_none(*pgd)) return; if (pgd_bad(*pgd)) { @@ -79,7 +87,7 @@ change_pmd_range(pgd_t *pgd, unsigned lo if (end > PGDIR_SIZE) end = PGDIR_SIZE; do { - change_pte_range(pmd, address, end - address, newprot); + change_pte_range(mm, pmd, address, end - address, newprot, realaddress); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); @@ -98,7 +106,7 @@ change_protection(struct vm_area_struct BUG(); spin_lock(¤t->mm->page_table_lock); do { - change_pmd_range(dir, start, end - start, newprot); + change_pmd_range(current->mm, dir, start, end - start, newprot); start = (start + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (start && (start < end)); diff -rup linux-2.5.70-mjb1-banana_split/mm/msync.c linux-2.5.70-mjb1-ukva/mm/msync.c --- linux-2.5.70-mjb1-banana_split/mm/msync.c Thu Jun 12 21:37:33 2003 +++ linux-2.5.70-mjb1-ukva/mm/msync.c Fri Jun 13 09:07:47 2003 @@ -53,7 +53,7 @@ static int filemap_sync_pte_range(pmd_t pmd_clear(pmd); return 0; } - pte = pte_offset_map(pmd, address); + pte = __pte_offset_map(pmd, address); if ((address & PMD_MASK) != (end & PMD_MASK)) end = (address & PMD_MASK) + PMD_SIZE; error = 0; diff -rup linux-2.5.70-mjb1-banana_split/mm/rmap.c linux-2.5.70-mjb1-ukva/mm/rmap.c --- linux-2.5.70-mjb1-banana_split/mm/rmap.c Thu Jun 12 21:37:33 2003 +++ linux-2.5.70-mjb1-ukva/mm/rmap.c Fri Jun 13 12:14:07 2003 @@ -138,7 +138,7 @@ find_pte(struct vm_area_struct *vma, str if (!pmd_present(*pmd)) goto out; - pte = pte_offset_map(pmd, address); + pte = __pte_offset_map(pmd, address); if (!pte_present(*pte)) goto out_unmap; diff -rup linux-2.5.70-mjb1-banana_split/mm/swapfile.c linux-2.5.70-mjb1-ukva/mm/swapfile.c --- linux-2.5.70-mjb1-banana_split/mm/swapfile.c Thu Jun 12 21:37:33 2003 +++ linux-2.5.70-mjb1-ukva/mm/swapfile.c Fri Jun 13 09:07:47 2003 @@ -406,7 +406,7 @@ static int unuse_pmd(struct vm_area_stru pmd_clear(dir); return 0; } - pte = pte_offset_map(dir, address); + pte = __pte_offset_map(dir, address); offset += address & PMD_MASK; address &= ~PMD_MASK; end = address + size;