From: "Ollie Wild" <aaw@google.com>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: linux-kernel@vger.kernel.org,
parisc-linux@lists.parisc-linux.org,
Linus Torvalds <torvalds@osdl.org>,
Arjan van de Ven <arjan@infradead.org>,
Ingo Molnar <mingo@elte.hu>,
linux-mm@kvack.org, Andrew Morton <akpm@osdl.org>,
Andi Kleen <ak@muc.de>,
linux-arch@vger.kernel.org, David Howells <dhowells@redhat.com>
Subject: Re: Removing MAX_ARG_PAGES (request for comments/assistance)
Date: Wed, 11 Oct 2006 14:48:14 -0700 [thread overview]
Message-ID: <65dd6fd50610111448q7ff210e1nb5f14917c311c8d4@mail.gmail.com> (raw)
In-Reply-To: <1160572460.2006.79.camel@taijtu>
[-- Attachment #1: Type: text/plain, Size: 745 bytes --]
> Yeah, you'll need to change the PTEs for those pages you created by
> calling get_user_page() by calling an mprotect like function; perhaps
> something like:
Thanks. I've incorporated your changes (updated patch attached).
> > + /* Move stack pages down in memory. */
> > + if (stack_shift) {
> > + // FIXME: Verify the shift is OK.
> > +
>
> What exactly are you wondering about? the call to move_vma looks sane to
> me
My concern was that the binfmt handler may have setup other vm areas
which overlap the new range. The move_vma() function doesn't do
overlap checking. I'm not sure if this is something I need to guard
against, or if it falls in the "Don't do that!" category.
Ollie
[-- Attachment #2: no_MAX_ARG_PAGES.patch --]
[-- Type: application/octet-stream, Size: 27310 bytes --]
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 82ef182..2e5a934 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -279,9 +279,6 @@ #define exit_elf_binfmt exit_elf32_bin
#define load_elf_binary load_elf32_binary
#define ELF_PLAT_INIT(r, load_addr) elf32_init(r)
-#define setup_arg_pages(bprm, stack_top, exec_stack) \
- ia32_setup_arg_pages(bprm, stack_top, exec_stack)
-int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack);
#undef start_thread
#define start_thread(regs,new_rip,new_rsp) do { \
@@ -338,57 +335,7 @@ static void elf32_init(struct pt_regs *r
int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
int executable_stack)
{
- unsigned long stack_base;
- struct vm_area_struct *mpnt;
- struct mm_struct *mm = current->mm;
- int i, ret;
-
- stack_base = stack_top - MAX_ARG_PAGES * PAGE_SIZE;
- mm->arg_start = bprm->p + stack_base;
-
- bprm->p += stack_base;
- if (bprm->loader)
- bprm->loader += stack_base;
- bprm->exec += stack_base;
-
- mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!mpnt)
- return -ENOMEM;
-
- memset(mpnt, 0, sizeof(*mpnt));
-
- down_write(&mm->mmap_sem);
- {
- mpnt->vm_mm = mm;
- mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
- mpnt->vm_end = stack_top;
- if (executable_stack == EXSTACK_ENABLE_X)
- mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC;
- else if (executable_stack == EXSTACK_DISABLE_X)
- mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
- else
- mpnt->vm_flags = VM_STACK_FLAGS;
- mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ?
- PAGE_COPY_EXEC : PAGE_COPY;
- if ((ret = insert_vm_struct(mm, mpnt))) {
- up_write(&mm->mmap_sem);
- kmem_cache_free(vm_area_cachep, mpnt);
- return ret;
- }
- mm->stack_vm = mm->total_vm = vma_pages(mpnt);
- }
-
- for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
- struct page *page = bprm->page[i];
- if (page) {
- bprm->page[i] = NULL;
- install_arg_page(mpnt, page, stack_base);
- }
- stack_base += PAGE_SIZE;
- }
- up_write(&mm->mmap_sem);
-
- return 0;
+ return setup_arg_pages(bprm, stack_top, executable_stack);
}
EXPORT_SYMBOL(ia32_setup_arg_pages);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 06435f3..d0e2292 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -255,8 +255,8 @@ #endif
while (argc-- > 0) {
size_t len;
__put_user((elf_addr_t)p, argv++);
- len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
- if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
+ len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
+ if (!len || len > MAX_ARG_STRLEN)
return 0;
p += len;
}
@@ -266,8 +266,8 @@ #endif
while (envc-- > 0) {
size_t len;
__put_user((elf_addr_t)p, envp++);
- len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
- if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
+ len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
+ if (!len || len > MAX_ARG_STRLEN)
return 0;
p += len;
}
@@ -769,10 +769,6 @@ static int load_elf_binary(struct linux_
}
/* OK, This is the point of no return */
- current->mm->start_data = 0;
- current->mm->end_data = 0;
- current->mm->end_code = 0;
- current->mm->mmap = NULL;
current->flags &= ~PF_FORKNOEXEC;
current->mm->def_flags = def_flags;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 1713c48..1378ba3 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -126,7 +126,9 @@ static int load_misc_binary(struct linux
goto _ret;
if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
- remove_arg_zero(bprm);
+ retval = remove_arg_zero(bprm);
+ if (retval)
+ goto _ret;
}
if (fmt->flags & MISC_FMT_OPEN_BINARY) {
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 1edbcca..5b6309f 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -68,7 +68,9 @@ static int load_script(struct linux_binp
* This is done in reverse order, because of how the
* user environment and arguments are stored.
*/
- remove_arg_zero(bprm);
+ retval = remove_arg_zero(bprm);
+ if (retval)
+ return retval;
retval = copy_strings_kernel(1, &bprm->interp, bprm);
if (retval < 0) return retval;
bprm->argc++;
diff --git a/fs/compat.c b/fs/compat.c
index 50624d4..359836b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1385,6 +1385,7 @@ static int compat_copy_strings(int argc,
{
struct page *kmapped_page = NULL;
char *kaddr = NULL;
+ unsigned long kpos = 0;
int ret;
while (argc-- > 0) {
@@ -1393,92 +1394,72 @@ static int compat_copy_strings(int argc,
unsigned long pos;
if (get_user(str, argv+argc) ||
- !(len = strnlen_user(compat_ptr(str), bprm->p))) {
+ !(len = strnlen_user(compat_ptr(str), MAX_ARG_STRLEN))) {
ret = -EFAULT;
goto out;
}
- if (bprm->p < len) {
+ if (MAX_ARG_STRLEN < len) {
ret = -E2BIG;
goto out;
}
- bprm->p -= len;
- /* XXX: add architecture specific overflow check here. */
+ /* We're going to work our way backwords. */
pos = bprm->p;
+ str += len;
+ bprm->p -= len;
while (len > 0) {
- int i, new, err;
int offset, bytes_to_copy;
- struct page *page;
offset = pos % PAGE_SIZE;
- i = pos/PAGE_SIZE;
- page = bprm->page[i];
- new = 0;
- if (!page) {
- page = alloc_page(GFP_HIGHUSER);
- bprm->page[i] = page;
- if (!page) {
- ret = -ENOMEM;
+ if (offset == 0)
+ offset = PAGE_SIZE;
+
+ bytes_to_copy = offset;
+ if (bytes_to_copy > len)
+ bytes_to_copy = len;
+
+ offset -= bytes_to_copy;
+ pos -= bytes_to_copy;
+ str -= bytes_to_copy;
+ len -= bytes_to_copy;
+
+ if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
+ struct page *page;
+
+ ret = get_user_pages(current, bprm->mm, pos,
+ 1, 1, 1, &page, NULL);
+ if (ret <= 0) {
+ /* We've exceed the stack rlimit. */
+ ret = -E2BIG;
goto out;
}
- new = 1;
- }
- if (page != kmapped_page) {
- if (kmapped_page)
+ if (kmapped_page) {
kunmap(kmapped_page);
+ put_page(kmapped_page);
+ }
kmapped_page = page;
kaddr = kmap(kmapped_page);
+ kpos = pos & PAGE_MASK;
}
- if (new && offset)
- memset(kaddr, 0, offset);
- bytes_to_copy = PAGE_SIZE - offset;
- if (bytes_to_copy > len) {
- bytes_to_copy = len;
- if (new)
- memset(kaddr+offset+len, 0,
- PAGE_SIZE-offset-len);
- }
- err = copy_from_user(kaddr+offset, compat_ptr(str),
- bytes_to_copy);
- if (err) {
+ if (copy_from_user(kaddr+offset, compat_ptr(str),
+ bytes_to_copy)) {
ret = -EFAULT;
goto out;
}
-
- pos += bytes_to_copy;
- str += bytes_to_copy;
- len -= bytes_to_copy;
}
}
ret = 0;
out:
- if (kmapped_page)
+ if (kmapped_page) {
kunmap(kmapped_page);
- return ret;
-}
-
-#ifdef CONFIG_MMU
-
-#define free_arg_pages(bprm) do { } while (0)
-
-#else
-
-static inline void free_arg_pages(struct linux_binprm *bprm)
-{
- int i;
-
- for (i = 0; i < MAX_ARG_PAGES; i++) {
- if (bprm->page[i])
- __free_page(bprm->page[i]);
- bprm->page[i] = NULL;
+ put_page(kmapped_page);
}
+ return ret;
}
-#endif /* CONFIG_MMU */
-
/*
* compat_do_execve() is mostly a copy of do_execve(), with the exception
* that it processes 32 bit argv and envp pointers.
@@ -1491,7 +1472,6 @@ int compat_do_execve(char * filename,
struct linux_binprm *bprm;
struct file *file;
int retval;
- int i;
retval = -ENOMEM;
bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
@@ -1505,24 +1485,19 @@ int compat_do_execve(char * filename,
sched_exec();
- bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
bprm->file = file;
bprm->filename = filename;
bprm->interp = filename;
- bprm->mm = mm_alloc();
- retval = -ENOMEM;
- if (!bprm->mm)
- goto out_file;
- retval = init_new_context(current, bprm->mm);
- if (retval < 0)
- goto out_mm;
+ retval = bprm_mm_init(bprm);
+ if (retval)
+ goto out_file;
- bprm->argc = compat_count(argv, bprm->p / sizeof(compat_uptr_t));
+ bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
if ((retval = bprm->argc) < 0)
goto out_mm;
- bprm->envc = compat_count(envp, bprm->p / sizeof(compat_uptr_t));
+ bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
if ((retval = bprm->envc) < 0)
goto out_mm;
@@ -1547,10 +1522,8 @@ int compat_do_execve(char * filename,
if (retval < 0)
goto out;
- retval = search_binary_handler(bprm, regs);
+ retval = search_binary_handler(bprm,regs);
if (retval >= 0) {
- free_arg_pages(bprm);
-
/* execve success */
security_bprm_free(bprm);
acct_update_integrals(current);
@@ -1559,19 +1532,12 @@ int compat_do_execve(char * filename,
}
out:
- /* Something went wrong, return the inode and free the argument pages*/
- for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
- struct page * page = bprm->page[i];
- if (page)
- __free_page(page);
- }
-
if (bprm->security)
security_bprm_free(bprm);
out_mm:
if (bprm->mm)
- mmdrop(bprm->mm);
+ mmput (bprm->mm);
out_file:
if (bprm->file) {
diff --git a/fs/exec.c b/fs/exec.c
index d993ea1..3739c26 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -172,6 +172,79 @@ exit:
goto out;
}
+#ifdef CONFIG_STACK_GROWSUP
+#error I broke your build because I rearchitected the stack code, and I \
+ don't have access to an architecture where CONFIG_STACK_GROWSUP is \
+ set. Please fixe this or send me a machine which I can test this on. \
+ \
+ -- Ollie Wild <aaw@google.com>
+#endif
+
+/* Create a new mm_struct and populate it with a temporary stack
+ * vm_area_struct. We don't have enough context at this point to set the
+ * stack flags, permissions, and offset, so we use temporary values. We'll
+ * update them later in setup_arg_pages(). */
+int bprm_mm_init(struct linux_binprm *bprm)
+{
+ int err;
+ struct mm_struct *mm = NULL;
+ struct vm_area_struct *vma = NULL;
+
+ bprm->mm = mm = mm_alloc();
+ err = -ENOMEM;
+ if (!mm)
+ goto err;
+
+ if ((err = init_new_context(current, mm)))
+ goto err;
+
+ bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL);
+ err = -ENOMEM;
+ if (!vma)
+ goto err;
+
+ down_write(&mm->mmap_sem);
+ {
+ vma->vm_mm = mm;
+
+ /* Place the stack at the top of user memory. Later, we'll
+ * move this to an appropriate place. We don't use STACK_TOP
+ * because that can depend on attributes which aren't
+ * configured yet. */
+ vma->vm_end = TASK_SIZE;
+ vma->vm_start = vma->vm_end - PAGE_SIZE;
+
+ vma->vm_flags = VM_STACK_FLAGS;
+ vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
+ if ((err = insert_vm_struct(mm, vma))) {
+ up_write(&mm->mmap_sem);
+ goto err;
+ }
+
+ mm->stack_vm = mm->total_vm = 1;
+ }
+ up_write(&mm->mmap_sem);
+
+ bprm->p = vma->vm_end - sizeof(void *);
+
+ return 0;
+
+err:
+ if (vma) {
+ bprm->vma = NULL;
+ kmem_cache_free(vm_area_cachep, vma);
+ }
+
+ if (mm) {
+ bprm->mm = NULL;
+ mmdrop(mm);
+ }
+
+ return err;
+}
+
+EXPORT_SYMBOL(bprm_mm_init);
+
/*
* count() counts the number of strings in array ARGV.
*/
@@ -197,15 +270,16 @@ static int count(char __user * __user *
}
/*
- * 'copy_strings()' copies argument/environment strings from user
- * memory to free pages in kernel mem. These are in a format ready
- * to be put directly into the top of new user memory.
+ * 'copy_strings()' copies argument/environment strings from the old
+ * processes's memory to the new process's stack. The call to get_user_pages()
+ * ensures the destination page is created and not swapped out.
*/
static int copy_strings(int argc, char __user * __user * argv,
struct linux_binprm *bprm)
{
struct page *kmapped_page = NULL;
char *kaddr = NULL;
+ unsigned long kpos = 0;
int ret;
while (argc-- > 0) {
@@ -214,69 +288,68 @@ static int copy_strings(int argc, char _
unsigned long pos;
if (get_user(str, argv+argc) ||
- !(len = strnlen_user(str, bprm->p))) {
+ !(len = strnlen_user(str, MAX_ARG_STRLEN))) {
ret = -EFAULT;
goto out;
}
- if (bprm->p < len) {
+ if (MAX_ARG_STRLEN < len) {
ret = -E2BIG;
goto out;
}
- bprm->p -= len;
- /* XXX: add architecture specific overflow check here. */
+ /* We're going to work our way backwords. */
pos = bprm->p;
+ str += len;
+ bprm->p -= len;
while (len > 0) {
- int i, new, err;
int offset, bytes_to_copy;
- struct page *page;
offset = pos % PAGE_SIZE;
- i = pos/PAGE_SIZE;
- page = bprm->page[i];
- new = 0;
- if (!page) {
- page = alloc_page(GFP_HIGHUSER);
- bprm->page[i] = page;
- if (!page) {
- ret = -ENOMEM;
+ if (offset == 0)
+ offset = PAGE_SIZE;
+
+ bytes_to_copy = offset;
+ if (bytes_to_copy > len)
+ bytes_to_copy = len;
+
+ offset -= bytes_to_copy;
+ pos -= bytes_to_copy;
+ str -= bytes_to_copy;
+ len -= bytes_to_copy;
+
+ if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
+ struct page *page;
+
+ ret = get_user_pages(current, bprm->mm, pos,
+ 1, 1, 1, &page, NULL);
+ if (ret <= 0) {
+ /* We've exceed the stack rlimit. */
+ ret = -E2BIG;
goto out;
}
- new = 1;
- }
- if (page != kmapped_page) {
- if (kmapped_page)
+ if (kmapped_page) {
kunmap(kmapped_page);
+ put_page(kmapped_page);
+ }
kmapped_page = page;
kaddr = kmap(kmapped_page);
+ kpos = pos & PAGE_MASK;
}
- if (new && offset)
- memset(kaddr, 0, offset);
- bytes_to_copy = PAGE_SIZE - offset;
- if (bytes_to_copy > len) {
- bytes_to_copy = len;
- if (new)
- memset(kaddr+offset+len, 0,
- PAGE_SIZE-offset-len);
- }
- err = copy_from_user(kaddr+offset, str, bytes_to_copy);
- if (err) {
+ if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
ret = -EFAULT;
goto out;
}
-
- pos += bytes_to_copy;
- str += bytes_to_copy;
- len -= bytes_to_copy;
}
}
ret = 0;
out:
- if (kmapped_page)
+ if (kmapped_page) {
kunmap(kmapped_page);
+ put_page(kmapped_page);
+ }
return ret;
}
@@ -295,157 +368,79 @@ int copy_strings_kernel(int argc,char **
EXPORT_SYMBOL(copy_strings_kernel);
-#ifdef CONFIG_MMU
-/*
- * This routine is used to map in a page into an address space: needed by
- * execve() for the initial stack and environment pages.
- *
- * vma->vm_mm->mmap_sem is held for writing.
- */
-void install_arg_page(struct vm_area_struct *vma,
- struct page *page, unsigned long address)
-{
- struct mm_struct *mm = vma->vm_mm;
- pte_t * pte;
- spinlock_t *ptl;
-
- if (unlikely(anon_vma_prepare(vma)))
- goto out;
-
- flush_dcache_page(page);
- pte = get_locked_pte(mm, address, &ptl);
- if (!pte)
- goto out;
- if (!pte_none(*pte)) {
- pte_unmap_unlock(pte, ptl);
- goto out;
- }
- inc_mm_counter(mm, anon_rss);
- lru_cache_add_active(page);
- set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
- page, vma->vm_page_prot))));
- page_add_new_anon_rmap(page, vma, address);
- pte_unmap_unlock(pte, ptl);
-
- /* no need for flush_tlb */
- return;
-out:
- __free_page(page);
- force_sig(SIGKILL, current);
-}
-
#define EXTRA_STACK_VM_PAGES 20 /* random */
+/* Finalizes the stack vm_area_struct. The flags and permissions are updated,
+ * the stack is optionally relocated, and some extra space is added.
+ */
int setup_arg_pages(struct linux_binprm *bprm,
unsigned long stack_top,
int executable_stack)
{
- unsigned long stack_base;
- struct vm_area_struct *mpnt;
+ unsigned long ret;
+ unsigned long stack_base, stack_shift;
struct mm_struct *mm = current->mm;
- int i, ret;
- long arg_size;
-#ifdef CONFIG_STACK_GROWSUP
- /* Move the argument and environment strings to the bottom of the
- * stack space.
- */
- int offset, j;
- char *to, *from;
-
- /* Start by shifting all the pages down */
- i = 0;
- for (j = 0; j < MAX_ARG_PAGES; j++) {
- struct page *page = bprm->page[j];
- if (!page)
- continue;
- bprm->page[i++] = page;
- }
+ BUG_ON(stack_top > TASK_SIZE);
+ BUG_ON(stack_top & ~PAGE_MASK);
- /* Now move them within their pages */
- offset = bprm->p % PAGE_SIZE;
- to = kmap(bprm->page[0]);
- for (j = 1; j < i; j++) {
- memmove(to, to + offset, PAGE_SIZE - offset);
- from = kmap(bprm->page[j]);
- memcpy(to + PAGE_SIZE - offset, from, offset);
- kunmap(bprm->page[j - 1]);
- to = from;
- }
- memmove(to, to + offset, PAGE_SIZE - offset);
- kunmap(bprm->page[j - 1]);
-
- /* Limit stack size to 1GB */
- stack_base = current->signal->rlim[RLIMIT_STACK].rlim_max;
- if (stack_base > (1 << 30))
- stack_base = 1 << 30;
- stack_base = PAGE_ALIGN(stack_top - stack_base);
-
- /* Adjust bprm->p to point to the end of the strings. */
- bprm->p = stack_base + PAGE_SIZE * i - offset;
-
- mm->arg_start = stack_base;
- arg_size = i << PAGE_SHIFT;
-
- /* zero pages that were copied above */
- while (i < MAX_ARG_PAGES)
- bprm->page[i++] = NULL;
-#else
- stack_base = arch_align_stack(stack_top - MAX_ARG_PAGES*PAGE_SIZE);
+ stack_base = arch_align_stack(stack_top - mm->stack_vm*PAGE_SIZE);
stack_base = PAGE_ALIGN(stack_base);
- bprm->p += stack_base;
- mm->arg_start = bprm->p;
- arg_size = stack_top - (PAGE_MASK & (unsigned long) mm->arg_start);
-#endif
- arg_size += EXTRA_STACK_VM_PAGES * PAGE_SIZE;
+ stack_shift = (bprm->p & PAGE_MASK) - stack_base;
+ BUG_ON(stack_shift < 0);
+ bprm->p -= stack_shift;
+ mm->arg_start = bprm->p;
if (bprm->loader)
- bprm->loader += stack_base;
- bprm->exec += stack_base;
-
- mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!mpnt)
- return -ENOMEM;
-
- memset(mpnt, 0, sizeof(*mpnt));
+ bprm->loader -= stack_shift;
+ bprm->exec -= stack_shift;
down_write(&mm->mmap_sem);
{
- mpnt->vm_mm = mm;
-#ifdef CONFIG_STACK_GROWSUP
- mpnt->vm_start = stack_base;
- mpnt->vm_end = stack_base + arg_size;
-#else
- mpnt->vm_end = stack_top;
- mpnt->vm_start = mpnt->vm_end - arg_size;
-#endif
+ struct vm_area_struct *vma = bprm->vma;
+ struct vm_area_struct *prev = NULL;
+ unsigned long vm_flags = vma->vm_flags;
+
/* Adjust stack execute permissions; explicitly enable
* for EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X
* and leave alone (arch default) otherwise. */
if (unlikely(executable_stack == EXSTACK_ENABLE_X))
- mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC;
+ vm_flags |= VM_EXEC;
else if (executable_stack == EXSTACK_DISABLE_X)
- mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
- else
- mpnt->vm_flags = VM_STACK_FLAGS;
- mpnt->vm_flags |= mm->def_flags;
- mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7];
- if ((ret = insert_vm_struct(mm, mpnt))) {
+ vm_flags &= ~VM_EXEC;
+ vm_flags |= mm->def_flags;
+
+ ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end,
+ vm_flags);
+ if (ret) {
up_write(&mm->mmap_sem);
- kmem_cache_free(vm_area_cachep, mpnt);
return ret;
}
- mm->stack_vm = mm->total_vm = vma_pages(mpnt);
- }
+ BUG_ON(prev != vma);
+
+ /* Move stack pages down in memory. */
+ if (stack_shift) {
+ /* This should be safe even with overlap because we
+ * are shifting down. */
+ ret = move_vma(vma, vma->vm_start,
+ vma->vm_end - vma->vm_start,
+ vma->vm_end - vma->vm_start,
+ vma->vm_start - stack_shift);
+ if (ret & ~PAGE_MASK) {
+ up_write(&mm->mmap_sem);
+ return ret;
+ }
+ }
- for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
- struct page *page = bprm->page[i];
- if (page) {
- bprm->page[i] = NULL;
- install_arg_page(mpnt, page, stack_base);
+ // Expand the stack.
+ vma = find_vma(mm, bprm->p);
+ BUG_ON(!vma || bprm->p < vma->vm_start);
+ if (expand_stack(vma, stack_base -
+ EXTRA_STACK_VM_PAGES * PAGE_SIZE)) {
+ up_write(&mm->mmap_sem);
+ return -EFAULT;
}
- stack_base += PAGE_SIZE;
}
up_write(&mm->mmap_sem);
@@ -454,23 +449,6 @@ #endif
EXPORT_SYMBOL(setup_arg_pages);
-#define free_arg_pages(bprm) do { } while (0)
-
-#else
-
-static inline void free_arg_pages(struct linux_binprm *bprm)
-{
- int i;
-
- for (i = 0; i < MAX_ARG_PAGES; i++) {
- if (bprm->page[i])
- __free_page(bprm->page[i]);
- bprm->page[i] = NULL;
- }
-}
-
-#endif /* CONFIG_MMU */
-
struct file *open_exec(const char *name)
{
struct nameidata nd;
@@ -986,8 +964,10 @@ void compute_creds(struct linux_binprm *
EXPORT_SYMBOL(compute_creds);
-void remove_arg_zero(struct linux_binprm *bprm)
+int remove_arg_zero(struct linux_binprm *bprm)
{
+ int ret = 0;
+
if (bprm->argc) {
unsigned long offset;
char * kaddr;
@@ -1001,13 +981,23 @@ void remove_arg_zero(struct linux_binprm
continue;
offset = 0;
kunmap_atomic(kaddr, KM_USER0);
+ put_page(page);
inside:
- page = bprm->page[bprm->p/PAGE_SIZE];
+ ret = get_user_pages(current, bprm->mm, bprm->p,
+ 1, 0, 1, &page, NULL);
+ if (ret <= 0) {
+ ret = -EFAULT;
+ goto out;
+ }
kaddr = kmap_atomic(page, KM_USER0);
}
kunmap_atomic(kaddr, KM_USER0);
bprm->argc--;
+ ret = 0;
}
+
+out:
+ return ret;
}
EXPORT_SYMBOL(remove_arg_zero);
@@ -1034,7 +1024,7 @@ #ifdef __alpha__
fput(bprm->file);
bprm->file = NULL;
- loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
+ loader = bprm->vma->vm_end - sizeof(void *);
file = open_exec("/sbin/loader");
retval = PTR_ERR(file);
@@ -1127,7 +1117,6 @@ int do_execve(char * filename,
struct linux_binprm *bprm;
struct file *file;
int retval;
- int i;
retval = -ENOMEM;
bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
@@ -1141,25 +1130,19 @@ int do_execve(char * filename,
sched_exec();
- bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
-
bprm->file = file;
bprm->filename = filename;
bprm->interp = filename;
- bprm->mm = mm_alloc();
- retval = -ENOMEM;
- if (!bprm->mm)
- goto out_file;
- retval = init_new_context(current, bprm->mm);
- if (retval < 0)
- goto out_mm;
+ retval = bprm_mm_init(bprm);
+ if (retval)
+ goto out_file;
- bprm->argc = count(argv, bprm->p / sizeof(void *));
+ bprm->argc = count(argv, MAX_ARG_STRINGS);
if ((retval = bprm->argc) < 0)
goto out_mm;
- bprm->envc = count(envp, bprm->p / sizeof(void *));
+ bprm->envc = count(envp, MAX_ARG_STRINGS);
if ((retval = bprm->envc) < 0)
goto out_mm;
@@ -1186,8 +1169,6 @@ int do_execve(char * filename,
retval = search_binary_handler(bprm,regs);
if (retval >= 0) {
- free_arg_pages(bprm);
-
/* execve success */
security_bprm_free(bprm);
acct_update_integrals(current);
@@ -1196,19 +1177,12 @@ int do_execve(char * filename,
}
out:
- /* Something went wrong, return the inode and free the argument pages*/
- for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
- struct page * page = bprm->page[i];
- if (page)
- __free_page(page);
- }
-
if (bprm->security)
security_bprm_free(bprm);
out_mm:
if (bprm->mm)
- mmdrop(bprm->mm);
+ mmput (bprm->mm);
out_file:
if (bprm->file) {
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index c1e82c5..3e11591 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -5,12 +5,9 @@ #include <linux/capability.h>
struct pt_regs;
-/*
- * MAX_ARG_PAGES defines the number of pages allocated for arguments
- * and envelope for the new program. 32 should suffice, this gives
- * a maximum env+arg of 128kB w/4KB pages!
- */
-#define MAX_ARG_PAGES 32
+/* FIXME: Find real limits, or none. */
+#define MAX_ARG_STRLEN (PAGE_SIZE * 32)
+#define MAX_ARG_STRINGS 0x7FFFFFFF
/* sizeof(linux_binprm->buf) */
#define BINPRM_BUF_SIZE 128
@@ -22,7 +19,7 @@ #ifdef __KERNEL__
*/
struct linux_binprm{
char buf[BINPRM_BUF_SIZE];
- struct page *page[MAX_ARG_PAGES];
+ struct vm_area_struct *vma;
struct mm_struct *mm;
unsigned long p; /* current top of mem */
int sh_bang;
@@ -65,7 +62,7 @@ extern int register_binfmt(struct linux_
extern int unregister_binfmt(struct linux_binfmt *);
extern int prepare_binprm(struct linux_binprm *);
-extern void remove_arg_zero(struct linux_binprm *);
+extern int __must_check remove_arg_zero(struct linux_binprm *);
extern int search_binary_handler(struct linux_binprm *,struct pt_regs *);
extern int flush_old_exec(struct linux_binprm * bprm);
@@ -82,6 +79,7 @@ #define EXSTACK_ENABLE_X 2 /* Enable ex
extern int setup_arg_pages(struct linux_binprm * bprm,
unsigned long stack_top,
int executable_stack);
+extern int bprm_mm_init(struct linux_binprm *bprm);
extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
extern void compute_creds(struct linux_binprm *binprm);
extern int do_coredump(long signr, int exit_code, struct pt_regs * regs);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2614662..bb1d4c9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -738,7 +738,6 @@ #endif
extern int make_pages_present(unsigned long addr, unsigned long end);
extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
-void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
@@ -754,9 +753,15 @@ int FASTCALL(set_page_dirty(struct page
int set_page_dirty_lock(struct page *page);
int clear_page_dirty_for_io(struct page *page);
+extern unsigned long move_vma(struct vm_area_struct *vma,
+ unsigned long old_addr, unsigned long old_len,
+ unsigned long new_len, unsigned long new_addr);
extern unsigned long do_mremap(unsigned long addr,
unsigned long old_len, unsigned long new_len,
unsigned long flags, unsigned long new_addr);
+extern int mprotect_fixup(struct vm_area_struct *vma,
+ struct vm_area_struct **pprev, unsigned long start,
+ unsigned long end, unsigned long newflags);
/*
* Prototype to add a shrinker callback for ageable caches.
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 42f2f11..d25c73e 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1748,6 +1748,10 @@ int __audit_ipc_set_perm(unsigned long q
int audit_bprm(struct linux_binprm *bprm)
{
+ /* FIXME: Don't do anything for now until I figure out how to handle
+ * this. With the latest changes, kmalloc could well fail under good
+ * scenarios. */
+#if 0
struct audit_aux_data_execve *ax;
struct audit_context *context = current->audit_context;
unsigned long p, next;
@@ -1775,6 +1779,7 @@ int audit_bprm(struct linux_binprm *bprm
ax->d.type = AUDIT_EXECVE;
ax->d.next = context->aux;
context->aux = (void *)ax;
+#endif
return 0;
}
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 3b8f3c0..e8346c3 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -128,7 +128,7 @@ static void change_protection(struct vm_
flush_tlb_range(vma, start, end);
}
-static int
+int
mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
unsigned long start, unsigned long end, unsigned long newflags)
{
diff --git a/mm/mremap.c b/mm/mremap.c
index 9c769fa..957b6af 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -155,7 +155,7 @@ static unsigned long move_page_tables(st
return len + old_addr - old_end; /* how much done */
}
-static unsigned long move_vma(struct vm_area_struct *vma,
+unsigned long move_vma(struct vm_area_struct *vma,
unsigned long old_addr, unsigned long old_len,
unsigned long new_len, unsigned long new_addr)
{
next prev parent reply other threads:[~2006-10-11 21:48 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-10-11 0:05 Ollie Wild
2006-10-11 8:00 ` Arjan van de Ven
2006-10-11 17:13 ` Ollie Wild
2006-10-11 13:14 ` Peter Zijlstra
2006-10-11 21:48 ` Ollie Wild [this message]
2006-10-24 17:48 ` Ollie Wild
2006-12-29 20:03 ` [patch] remove MAX_ARG_PAGES Ingo Molnar
2006-12-29 20:49 ` Russell King
2006-12-29 21:23 ` Linus Torvalds
2006-12-29 22:20 ` Ingo Molnar
2006-12-29 21:57 ` Ingo Molnar
2007-01-01 6:51 ` Ollie Wild
2007-01-01 6:58 ` Ollie Wild
2007-01-01 17:28 ` Pavel Machek
2007-01-02 18:18 ` David Howells
2007-01-02 17:52 ` Removing MAX_ARG_PAGES (request for comments/assistance) David Howells
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=65dd6fd50610111448q7ff210e1nb5f14917c311c8d4@mail.gmail.com \
--to=aaw@google.com \
--cc=a.p.zijlstra@chello.nl \
--cc=ak@muc.de \
--cc=akpm@osdl.org \
--cc=arjan@infradead.org \
--cc=dhowells@redhat.com \
--cc=linux-arch@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mingo@elte.hu \
--cc=parisc-linux@lists.parisc-linux.org \
--cc=torvalds@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox