include/linux/mm.h | 2 mm/madvise.c | 150 +++++------------------------------- mm/mlock.c | 158 +++++++------------------------------- mm/mmap.c | 37 +++++--- mm/mprotect.c | 218 ++++++++++++++++------------------------------------- 5 files changed, 147 insertions(+), 418 deletions(-) --- 2.5.33/include/linux/mm.h~split-vma Mon Sep 2 14:05:09 2002 +++ 2.5.33-akpm/include/linux/mm.h Mon Sep 2 14:05:09 2002 @@ -497,6 +497,8 @@ extern int expand_stack(struct vm_area_s extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, struct vm_area_struct **pprev); +extern int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, + unsigned long addr, int new_below); /* Look up the first VMA which intersects the interval start_addr..end_addr-1, NULL if none. Assume start_addr < end_addr. */ --- 2.5.33/mm/madvise.c~split-vma Mon Sep 2 14:05:09 2002 +++ 2.5.33-akpm/mm/madvise.c Mon Sep 2 14:05:09 2002 @@ -2,17 +2,37 @@ * linux/mm/madvise.c * * Copyright (C) 1999 Linus Torvalds + * Copyright (C) 2002 Christoph Hellwig */ -#include #include #include -#include -static inline void setup_read_behavior(struct vm_area_struct * vma, - int behavior) +/* + * We can potentially split a vm area into separate + * areas, each area with its own behavior. + */ +static long madvise_behavior(struct vm_area_struct * vma, unsigned long start, + unsigned long end, int behavior) { + struct mm_struct * mm = vma->vm_mm; + int error; + + if (start != vma->vm_start) { + error = split_vma(mm, vma, start, 1); + if (error) + return -EAGAIN; + } + + if (end != vma->vm_end) { + error = split_vma(mm, vma, end, 0); + if (error) + return -EAGAIN; + } + + spin_lock(&mm->page_table_lock); + vma->vm_raend = 0; VM_ClearReadHint(vma); switch (behavior) { @@ -25,134 +45,12 @@ static inline void setup_read_behavior(s default: break; } -} - -static long madvise_fixup_start(struct vm_area_struct * vma, - unsigned long end, int behavior) -{ - struct vm_area_struct * n; - struct mm_struct * mm = vma->vm_mm; - - n = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); - if (!n) - return -EAGAIN; - *n = *vma; - n->vm_end = end; - setup_read_behavior(n, behavior); - n->vm_raend = 0; - if (n->vm_file) - get_file(n->vm_file); - if (n->vm_ops && n->vm_ops->open) - n->vm_ops->open(n); - vma->vm_pgoff += (end - vma->vm_start) >> PAGE_SHIFT; - lock_vma_mappings(vma); - spin_lock(&mm->page_table_lock); - vma->vm_start = end; - __insert_vm_struct(mm, n); - spin_unlock(&mm->page_table_lock); - unlock_vma_mappings(vma); - return 0; -} - -static long madvise_fixup_end(struct vm_area_struct * vma, - unsigned long start, int behavior) -{ - struct vm_area_struct * n; - struct mm_struct * mm = vma->vm_mm; - - n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!n) - return -EAGAIN; - *n = *vma; - n->vm_start = start; - n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT; - setup_read_behavior(n, behavior); - n->vm_raend = 0; - if (n->vm_file) - get_file(n->vm_file); - if (n->vm_ops && n->vm_ops->open) - n->vm_ops->open(n); - lock_vma_mappings(vma); - spin_lock(&mm->page_table_lock); - vma->vm_end = start; - __insert_vm_struct(mm, n); spin_unlock(&mm->page_table_lock); - unlock_vma_mappings(vma); - return 0; -} -static long madvise_fixup_middle(struct vm_area_struct * vma, unsigned long start, - unsigned long end, int behavior) -{ - struct vm_area_struct * left, * right; - struct mm_struct * mm = vma->vm_mm; - - left = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!left) - return -EAGAIN; - right = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!right) { - kmem_cache_free(vm_area_cachep, left); - return -EAGAIN; - } - *left = *vma; - *right = *vma; - left->vm_end = start; - right->vm_start = end; - right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT; - left->vm_raend = 0; - right->vm_raend = 0; - if (vma->vm_file) - atomic_add(2, &vma->vm_file->f_count); - - if (vma->vm_ops && vma->vm_ops->open) { - vma->vm_ops->open(left); - vma->vm_ops->open(right); - } - vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT; - vma->vm_raend = 0; - lock_vma_mappings(vma); - spin_lock(&mm->page_table_lock); - vma->vm_start = start; - vma->vm_end = end; - setup_read_behavior(vma, behavior); - __insert_vm_struct(mm, left); - __insert_vm_struct(mm, right); - spin_unlock(&mm->page_table_lock); - unlock_vma_mappings(vma); return 0; } /* - * We can potentially split a vm area into separate - * areas, each area with its own behavior. - */ -static long madvise_behavior(struct vm_area_struct * vma, unsigned long start, - unsigned long end, int behavior) -{ - int error = 0; - - /* This caps the number of vma's this process can own */ - if (vma->vm_mm->map_count > MAX_MAP_COUNT) - return -ENOMEM; - - if (start == vma->vm_start) { - if (end == vma->vm_end) { - setup_read_behavior(vma, behavior); - vma->vm_raend = 0; - } else - error = madvise_fixup_start(vma, end, behavior); - } else { - if (end == vma->vm_end) - error = madvise_fixup_end(vma, start, behavior); - else - error = madvise_fixup_middle(vma, start, end, behavior); - } - - return error; -} - -/* * Schedule all required I/O operations, then run the disk queue * to make sure they are started. Do not wait for completion. */ --- 2.5.33/mm/mlock.c~split-vma Mon Sep 2 14:05:09 2002 +++ 2.5.33-akpm/mm/mlock.c Mon Sep 2 14:05:09 2002 @@ -2,147 +2,49 @@ * linux/mm/mlock.c * * (C) Copyright 1995 Linus Torvalds + * (C) Copyright 2002 Christoph Hellwig */ -#include -#include -#include -#include -#include - -#include -#include - -static inline int mlock_fixup_all(struct vm_area_struct * vma, int newflags) -{ - spin_lock(&vma->vm_mm->page_table_lock); - vma->vm_flags = newflags; - spin_unlock(&vma->vm_mm->page_table_lock); - return 0; -} - -static inline int mlock_fixup_start(struct vm_area_struct * vma, - unsigned long end, int newflags) -{ - struct vm_area_struct * n; - - n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!n) - return -EAGAIN; - *n = *vma; - n->vm_end = end; - n->vm_flags = newflags; - n->vm_raend = 0; - if (n->vm_file) - get_file(n->vm_file); - if (n->vm_ops && n->vm_ops->open) - n->vm_ops->open(n); - vma->vm_pgoff += (end - vma->vm_start) >> PAGE_SHIFT; - lock_vma_mappings(vma); - spin_lock(&vma->vm_mm->page_table_lock); - vma->vm_start = end; - __insert_vm_struct(current->mm, n); - spin_unlock(&vma->vm_mm->page_table_lock); - unlock_vma_mappings(vma); - return 0; -} -static inline int mlock_fixup_end(struct vm_area_struct * vma, - unsigned long start, int newflags) -{ - struct vm_area_struct * n; - - n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!n) - return -EAGAIN; - *n = *vma; - n->vm_start = start; - n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT; - n->vm_flags = newflags; - n->vm_raend = 0; - if (n->vm_file) - get_file(n->vm_file); - if (n->vm_ops && n->vm_ops->open) - n->vm_ops->open(n); - lock_vma_mappings(vma); - spin_lock(&vma->vm_mm->page_table_lock); - vma->vm_end = start; - __insert_vm_struct(current->mm, n); - spin_unlock(&vma->vm_mm->page_table_lock); - unlock_vma_mappings(vma); - return 0; -} +#include +#include -static inline int mlock_fixup_middle(struct vm_area_struct * vma, - unsigned long start, unsigned long end, int newflags) -{ - struct vm_area_struct * left, * right; - - left = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!left) - return -EAGAIN; - right = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!right) { - kmem_cache_free(vm_area_cachep, left); - return -EAGAIN; - } - *left = *vma; - *right = *vma; - left->vm_end = start; - right->vm_start = end; - right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT; - vma->vm_flags = newflags; - left->vm_raend = 0; - right->vm_raend = 0; - if (vma->vm_file) - atomic_add(2, &vma->vm_file->f_count); - - if (vma->vm_ops && vma->vm_ops->open) { - vma->vm_ops->open(left); - vma->vm_ops->open(right); - } - vma->vm_raend = 0; - vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT; - lock_vma_mappings(vma); - spin_lock(&vma->vm_mm->page_table_lock); - vma->vm_start = start; - vma->vm_end = end; - vma->vm_flags = newflags; - __insert_vm_struct(current->mm, left); - __insert_vm_struct(current->mm, right); - spin_unlock(&vma->vm_mm->page_table_lock); - unlock_vma_mappings(vma); - return 0; -} static int mlock_fixup(struct vm_area_struct * vma, unsigned long start, unsigned long end, unsigned int newflags) { - int pages, retval; + struct mm_struct * mm = vma->vm_mm; + int pages, error; if (newflags == vma->vm_flags) return 0; - if (start == vma->vm_start) { - if (end == vma->vm_end) - retval = mlock_fixup_all(vma, newflags); - else - retval = mlock_fixup_start(vma, end, newflags); - } else { - if (end == vma->vm_end) - retval = mlock_fixup_end(vma, start, newflags); - else - retval = mlock_fixup_middle(vma, start, end, newflags); + if (start != vma->vm_start) { + error = split_vma(mm, vma, start, 1); + if (error) + return -EAGAIN; } - if (!retval) { - /* keep track of amount of locked VM */ - pages = (end - start) >> PAGE_SHIFT; - if (newflags & VM_LOCKED) { - pages = -pages; - make_pages_present(start, end); - } - vma->vm_mm->locked_vm -= pages; + + if (end != vma->vm_end) { + error = split_vma(mm, vma, end, 0); + if (error) + return -EAGAIN; } - return retval; + + spin_lock(&mm->page_table_lock); + vma->vm_flags = newflags; + spin_unlock(&mm->page_table_lock); + + /* + * Keep track of amount of locked VM. + */ + pages = (end - start) >> PAGE_SHIFT; + if (newflags & VM_LOCKED) { + pages = -pages; + make_pages_present(start, end); + } + + vma->vm_mm->locked_vm -= pages; + return 0; } static int do_mlock(unsigned long start, size_t len, int on) --- 2.5.33/mm/mmap.c~split-vma Mon Sep 2 14:05:09 2002 +++ 2.5.33-akpm/mm/mmap.c Mon Sep 2 14:05:09 2002 @@ -1047,10 +1047,11 @@ static struct vm_area_struct *touched_by } /* - * Split a vma into two pieces at address 'addr', the original vma - * will contain the first part, a new vma is allocated for the tail. + * Split a vma into two pieces at address 'addr', a new vma is allocated + * either for the first part or the the tail. */ -static int splitvma(struct mm_struct *mm, struct vm_area_struct *mpnt, unsigned long addr) +int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, + unsigned long addr, int new_below) { struct vm_area_struct *new; @@ -1062,22 +1063,28 @@ static int splitvma(struct mm_struct *mm return -ENOMEM; /* most fields are the same, copy all, and then fixup */ - *new = *mpnt; + *new = *vma; + + if (new_below) { + vma->vm_start = new->vm_end = addr; + vma->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); + } else { + new->vm_start = vma->vm_end = addr; + new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); + } - new->vm_start = addr; - new->vm_pgoff = mpnt->vm_pgoff + ((addr - mpnt->vm_start) >> PAGE_SHIFT); new->vm_raend = 0; - if (mpnt->vm_file) - get_file(mpnt->vm_file); - if (mpnt->vm_ops && mpnt->vm_ops->open) - mpnt->vm_ops->open(mpnt); - mpnt->vm_end = addr; /* Truncate area */ + if (new->vm_file) + get_file(new->vm_file); + + if (new->vm_ops && new->vm_ops->open) + new->vm_ops->open(new); spin_lock(&mm->page_table_lock); - lock_vma_mappings(mpnt); + lock_vma_mappings(vma); __insert_vm_struct(mm, new); - unlock_vma_mappings(mpnt); + unlock_vma_mappings(vma); spin_unlock(&mm->page_table_lock); return 0; @@ -1114,7 +1121,7 @@ int do_munmap(struct mm_struct *mm, unsi * If we need to split any vma, do it now to save pain later. */ if (start > mpnt->vm_start) { - if (splitvma(mm, mpnt, start)) + if (split_vma(mm, mpnt, start, 0)) return -ENOMEM; prev = mpnt; mpnt = mpnt->vm_next; @@ -1123,7 +1130,7 @@ int do_munmap(struct mm_struct *mm, unsi /* Does it split the last one? */ last = find_vma(mm, end); if (last && end > last->vm_start) { - if (splitvma(mm, last, end)) + if (split_vma(mm, last, end, 0)) return -ENOMEM; } --- 2.5.33/mm/mprotect.c~split-vma Mon Sep 2 14:05:09 2002 +++ 2.5.33-akpm/mm/mprotect.c Mon Sep 2 14:05:09 2002 @@ -2,13 +2,14 @@ * mm/mprotect.c * * (C) Copyright 1994 Linus Torvalds + * (C) Copyright 2002 Christoph Hellwig * * Address space accounting code * (C) Copyright 2002 Red Hat Inc, All Rights Reserved */ + #include #include -#include #include #include #include @@ -100,158 +101,59 @@ static void change_protection(struct vm_ spin_unlock(¤t->mm->page_table_lock); return; } - -static inline int mprotect_fixup_all(struct vm_area_struct * vma, struct vm_area_struct ** pprev, - int newflags, pgprot_t prot) +/* + * Try to merge a vma with the previos flag, return 1 if successfull or 0 if it + * was impossible. + */ +static int mprotect_attemp_merge(struct vm_area_struct * vma, + struct vm_area_struct * prev, + unsigned long end, int newflags) { - struct vm_area_struct * prev = *pprev; struct mm_struct * mm = vma->vm_mm; - if (prev && prev->vm_end == vma->vm_start && can_vma_merge(prev, newflags) && - !vma->vm_file && !(vma->vm_flags & VM_SHARED)) { + if (!prev || !vma) + return 0; + if (prev->vm_end != vma->vm_start) + return 0; + if (!can_vma_merge(prev, newflags)) + return 0; + if (vma->vm_file || (vma->vm_flags & VM_SHARED)) + return 0; + + /* + * If the whole area changes to the protection of the previous one + * we can just get rid of it. + */ + if (end == vma->vm_end) { spin_lock(&mm->page_table_lock); - prev->vm_end = vma->vm_end; + prev->vm_end = end; __vma_unlink(mm, vma, prev); spin_unlock(&mm->page_table_lock); kmem_cache_free(vm_area_cachep, vma); mm->map_count--; + return 1; + } - return 0; - } - + /* + * Otherwise extend it. + */ spin_lock(&mm->page_table_lock); - vma->vm_flags = newflags; - vma->vm_page_prot = prot; - spin_unlock(&mm->page_table_lock); - - *pprev = vma; - - return 0; -} - -static inline int mprotect_fixup_start(struct vm_area_struct * vma, struct vm_area_struct ** pprev, - unsigned long end, - int newflags, pgprot_t prot) -{ - struct vm_area_struct * n, * prev = *pprev; - - *pprev = vma; - - if (prev && prev->vm_end == vma->vm_start && can_vma_merge(prev, newflags) && - !vma->vm_file && !(vma->vm_flags & VM_SHARED)) { - spin_lock(&vma->vm_mm->page_table_lock); - prev->vm_end = end; - vma->vm_start = end; - spin_unlock(&vma->vm_mm->page_table_lock); - - return 0; - } - n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!n) - return -ENOMEM; - *n = *vma; - n->vm_end = end; - n->vm_flags = newflags; - n->vm_raend = 0; - n->vm_page_prot = prot; - if (n->vm_file) - get_file(n->vm_file); - if (n->vm_ops && n->vm_ops->open) - n->vm_ops->open(n); - vma->vm_pgoff += (end - vma->vm_start) >> PAGE_SHIFT; - lock_vma_mappings(vma); - spin_lock(&vma->vm_mm->page_table_lock); + prev->vm_end = end; vma->vm_start = end; - __insert_vm_struct(current->mm, n); - spin_unlock(&vma->vm_mm->page_table_lock); - unlock_vma_mappings(vma); - - return 0; -} - -static inline int mprotect_fixup_end(struct vm_area_struct * vma, struct vm_area_struct ** pprev, - unsigned long start, - int newflags, pgprot_t prot) -{ - struct vm_area_struct * n; - - n = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); - if (!n) - return -ENOMEM; - *n = *vma; - n->vm_start = start; - n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT; - n->vm_flags = newflags; - n->vm_raend = 0; - n->vm_page_prot = prot; - if (n->vm_file) - get_file(n->vm_file); - if (n->vm_ops && n->vm_ops->open) - n->vm_ops->open(n); - lock_vma_mappings(vma); - spin_lock(&vma->vm_mm->page_table_lock); - vma->vm_end = start; - __insert_vm_struct(current->mm, n); - spin_unlock(&vma->vm_mm->page_table_lock); - unlock_vma_mappings(vma); - - *pprev = n; - - return 0; + spin_unlock(&mm->page_table_lock); + return 1; } -static inline int mprotect_fixup_middle(struct vm_area_struct * vma, struct vm_area_struct ** pprev, - unsigned long start, unsigned long end, - int newflags, pgprot_t prot) -{ - struct vm_area_struct * left, * right; - - left = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!left) - return -ENOMEM; - right = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!right) { - kmem_cache_free(vm_area_cachep, left); - return -ENOMEM; - } - *left = *vma; - *right = *vma; - left->vm_end = start; - right->vm_start = end; - right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT; - left->vm_raend = 0; - right->vm_raend = 0; - if (vma->vm_file) - atomic_add(2,&vma->vm_file->f_count); - if (vma->vm_ops && vma->vm_ops->open) { - vma->vm_ops->open(left); - vma->vm_ops->open(right); - } - vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT; - vma->vm_raend = 0; - vma->vm_page_prot = prot; - lock_vma_mappings(vma); - spin_lock(&vma->vm_mm->page_table_lock); - vma->vm_start = start; - vma->vm_end = end; - vma->vm_flags = newflags; - __insert_vm_struct(current->mm, left); - __insert_vm_struct(current->mm, right); - spin_unlock(&vma->vm_mm->page_table_lock); - unlock_vma_mappings(vma); - - *pprev = right; - return 0; -} static int mprotect_fixup(struct vm_area_struct * vma, struct vm_area_struct ** pprev, unsigned long start, unsigned long end, unsigned int newflags) { + struct mm_struct * mm = vma->vm_mm; + unsigned long charged = 0; pgprot_t newprot; int error; - unsigned long charged = 0; if (newflags == vma->vm_flags) { *pprev = vma; @@ -266,29 +168,46 @@ static int mprotect_fixup(struct vm_area * FIXME? We haven't defined a VM_NORESERVE flag, so mprotecting * a MAP_NORESERVE private mapping to writable will now reserve. */ - if ((newflags & VM_WRITE) && - !(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) { - charged = (end - start) >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) - return -ENOMEM; - newflags |= VM_ACCOUNT; + if (newflags & VM_WRITE) { + if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) { + charged = (end - start) >> PAGE_SHIFT; + if (!vm_enough_memory(charged)) + return -ENOMEM; + newflags |= VM_ACCOUNT; + } } + newprot = protection_map[newflags & 0xf]; + if (start == vma->vm_start) { - if (end == vma->vm_end) - error = mprotect_fixup_all(vma, pprev, newflags, newprot); - else - error = mprotect_fixup_start(vma, pprev, end, newflags, newprot); - } else if (end == vma->vm_end) - error = mprotect_fixup_end(vma, pprev, start, newflags, newprot); - else - error = mprotect_fixup_middle(vma, pprev, start, end, newflags, newprot); - if (error) { - vm_unacct_memory(charged); - return error; + /* + * Try to merge with the previous vma. + */ + if (mprotect_attemp_merge(vma, *pprev, end, newflags)) + return 0; + } else { + error = split_vma(mm, vma, start, 1); + if (error) + goto fail; + } + + if (end != vma->vm_end) { + error = split_vma(mm, vma, end, 0); + if (error) + goto fail; } + + spin_lock(&mm->page_table_lock); + vma->vm_flags = newflags; + vma->vm_page_prot = newprot; + spin_unlock(&mm->page_table_lock); + change_protection(vma, start, end, newprot); return 0; + +fail: + vm_unacct_memory(charged); + return error; } asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot) @@ -357,6 +276,7 @@ asmlinkage long sys_mprotect(unsigned lo goto out; } } + if (next && prev->vm_end == next->vm_start && can_vma_merge(next, prev->vm_flags) && !prev->vm_file && !(prev->vm_flags & VM_SHARED)) { spin_lock(&prev->vm_mm->page_table_lock); .