Index: linux-2.6/arch/x86/lib/Makefile_64 =================================================================== --- linux-2.6.orig/arch/x86/lib/Makefile_64 +++ linux-2.6/arch/x86/lib/Makefile_64 @@ -10,4 +10,4 @@ obj-$(CONFIG_SMP) += msr-on-cpu.o lib-y := csum-partial_64.o csum-copy_64.o csum-wrappers_64.o delay_64.o \ usercopy_64.o getuser_64.o putuser_64.o \ thunk_64.o clear_page_64.o copy_page_64.o bitstr_64.o bitops_64.o -lib-y += memcpy_64.o memmove_64.o memset_64.o copy_user_64.o rwlock_64.o copy_user_nocache_64.o +lib-y += memcpy_64.o memmove_64.o memset_64.o copy_user_64.o rwlock_64.o copy_user_nocache_64.o gup.o Index: linux-2.6/arch/x86/lib/gup.c =================================================================== --- /dev/null +++ linux-2.6/arch/x86/lib/gup.c @@ -0,0 +1,99 @@ +#include +#include +#include + +static int gup_pte_range(struct mm_struct *mm, pmd_t pmd, unsigned long addr, unsigned long end, struct page **pages, int *nr, int write) +{ + pte_t *ptep; + + ptep = (pte_t *)pmd_page_vaddr(pmd) + pte_index(addr); + do { + pte_t pte = *ptep; + struct page *page; + + if (pte_none(pte) || !pte_present(pte)) + return 0; + + if (write && !pte_write(pte)) + return 0; + + page = pte_page(pte); + get_page(page); + pages[*nr] = page; + (*nr)++; + + } while (ptep++, addr += PAGE_SIZE, addr != end); + pte_unmap(ptep); + + return 1; +} + +static int gup_pmd_range(struct mm_struct *mm, pud_t pud, unsigned long addr, unsigned long end, struct page **pages, int *nr, int write) +{ + unsigned long next; + pmd_t *pmdp; + + pmdp = (pmd_t *)pud_page_vaddr(pud) + pmd_index(addr); + do { + pmd_t pmd = *pmdp; + + next = pmd_addr_end(addr, end); + if (pmd_none(pmd)) + return 0; + /* if (pte_huge(pmd)) {...} */ + if (!gup_pte_range(mm, pmd, addr, next, pages, nr, write)) + return 0; + } while (pmdp++, addr = next, addr != end); + + return 1; +} + +static unsigned long gup_pud_range(struct mm_struct *mm, pgd_t pgd, unsigned long addr, unsigned long end, struct page **pages, int *nr, int write) +{ + unsigned long next; + pud_t *pudp; + + pudp = (pud_t *)pgd_page_vaddr(pgd) + pud_index(addr); + do { + pud_t pud = *pudp; + + next = pud_addr_end(addr, end); + if (pud_none(pud)) + return 0; + if (!gup_pmd_range(mm, pud, addr, next, pages, nr, write)) + return 0; + } while (pudp++, addr = next, addr != end); + + return 1; +} + +int fast_gup(unsigned long addr, unsigned long end, int flags, struct page **pages, int nr, int write) +{ + struct mm_struct *mm = current->mm; + unsigned long next; + pgd_t *pgdp; + + /* XXX: batch / limit 'nr', to avoid huge latency */ + /* + * This doesn't prevent pagetable teardown, but does prevent + * the pagetables from being freed on x86-64. XXX: hugepages! + * + * So long as we atomically load page table pointers versus teardown + * (which we do on x86-64), we can follow the address down to the + * the page. + */ + local_irq_disable(); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (!gup_pud_range(mm, pgd, addr, next, pages, &nr, write)) + break; + } while (pgdp++, addr = next, addr != end); + local_irq_enable(); + + return nr; +}