From: "Ollie Wild" <aaw@google.com>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>,
linux-kernel@vger.kernel.org,
parisc-linux@lists.parisc-linux.org, linux-mm@kvack.org,
linux-arch@vger.kernel.org, Andrew Morton <akpm@osdl.org>,
Ingo Molnar <mingo@elte.hu>, Andi Kleen <ak@suse.de>
Subject: [patch] removes MAX_ARG_PAGES
Date: Sun, 6 May 2007 01:51:34 -0700 [thread overview]
Message-ID: <65dd6fd50705060151m78bb9b4fpcb941b16a8c4709e@mail.gmail.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 903 bytes --]
A while back, I sent out a preliminary patch
(http://thread.gmane.org/gmane.linux.ports.hppa/752) to remove the
MAX_ARG_PAGES limit on command line sizes. Since then, Peter Zijlstra
and I have fixed a number of bugs and addressed the various
outstanding issues.
The attached patch incorporates the following changes:
- Fixes a BUG_ON() assertion failure discovered by Ingo Molnar.
- Adds CONFIG_STACK_GROWSUP (parisc) support.
- Adds auditing support.
- Reverts to the old behavior on architectures with no MMU.
- Fixes broken execution of 64-bit binaries from 32-bit binaries.
- Adds elf_fdpic support.
- Fixes cache coherency bugs.
We've tested the following architectures: i386, x86_64, um/i386,
parisc, and frv. These are representative of the various scenarios
which this patch addresses, but other architecture teams should try it
out to make sure there aren't any unexpected gotchas.
Ollie
[-- Attachment #2: no_MAX_ARG_PAGES.patch --]
[-- Type: text/x-patch, Size: 47061 bytes --]
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 26f15c4..022043e 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -60,8 +60,6 @@ int handle_page_fault(unsigned long address, unsigned long ip,
goto good_area;
else if(!(vma->vm_flags & VM_GROWSDOWN))
goto out;
- else if(is_user && !ARCH_IS_STACKGROW(address))
- goto out;
else if(expand_stack(vma, address))
goto out;
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 185399b..a2731fc 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -232,9 +232,6 @@ do { \
#define load_elf_binary load_elf32_binary
#define ELF_PLAT_INIT(r, load_addr) elf32_init(r)
-#define setup_arg_pages(bprm, stack_top, exec_stack) \
- ia32_setup_arg_pages(bprm, stack_top, exec_stack)
-int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack);
#undef start_thread
#define start_thread(regs,new_rip,new_rsp) do { \
@@ -289,55 +286,7 @@ static void elf32_init(struct pt_regs *regs)
int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
int executable_stack)
{
- unsigned long stack_base;
- struct vm_area_struct *mpnt;
- struct mm_struct *mm = current->mm;
- int i, ret;
-
- stack_base = stack_top - MAX_ARG_PAGES * PAGE_SIZE;
- mm->arg_start = bprm->p + stack_base;
-
- bprm->p += stack_base;
- if (bprm->loader)
- bprm->loader += stack_base;
- bprm->exec += stack_base;
-
- mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
- if (!mpnt)
- return -ENOMEM;
-
- down_write(&mm->mmap_sem);
- {
- mpnt->vm_mm = mm;
- mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
- mpnt->vm_end = stack_top;
- if (executable_stack == EXSTACK_ENABLE_X)
- mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC;
- else if (executable_stack == EXSTACK_DISABLE_X)
- mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
- else
- mpnt->vm_flags = VM_STACK_FLAGS;
- mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ?
- PAGE_COPY_EXEC : PAGE_COPY;
- if ((ret = insert_vm_struct(mm, mpnt))) {
- up_write(&mm->mmap_sem);
- kmem_cache_free(vm_area_cachep, mpnt);
- return ret;
- }
- mm->stack_vm = mm->total_vm = vma_pages(mpnt);
- }
-
- for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
- struct page *page = bprm->page[i];
- if (page) {
- bprm->page[i] = NULL;
- install_arg_page(mpnt, page, stack_base);
- }
- stack_base += PAGE_SIZE;
- }
- up_write(&mm->mmap_sem);
-
- return 0;
+ return setup_arg_pages(bprm, stack_top, executable_stack);
}
EXPORT_SYMBOL(ia32_setup_arg_pages);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 9cc4f0a..fa0cf77 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -254,8 +254,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
size_t len;
if (__put_user((elf_addr_t)p, argv++))
return -EFAULT;
- len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
- if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
+ len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
+ if (!len || len > MAX_ARG_STRLEN)
return 0;
p += len;
}
@@ -266,8 +266,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
size_t len;
if (__put_user((elf_addr_t)p, envp++))
return -EFAULT;
- len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
- if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
+ len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
+ if (!len || len > MAX_ARG_STRLEN)
return 0;
p += len;
}
@@ -777,10 +777,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
}
/* OK, This is the point of no return */
- current->mm->start_data = 0;
- current->mm->end_data = 0;
- current->mm->end_code = 0;
- current->mm->mmap = NULL;
current->flags &= ~PF_FORKNOEXEC;
current->mm->def_flags = def_flags;
@@ -985,9 +981,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
compute_creds(bprm);
current->flags &= ~PF_FORKNOEXEC;
- create_elf_tables(bprm, &loc->elf_ex,
+ retval = create_elf_tables(bprm, &loc->elf_ex,
(interpreter_type == INTERPRETER_AOUT),
load_addr, interp_load_addr);
+ if (retval < 0) {
+ send_sig(SIGSEGV, current, 0);
+ goto out;
+ }
/* N.B. passed_fileno might not be initialized? */
if (interpreter_type == INTERPRETER_AOUT)
current->mm->arg_start += strlen(passed_fileno) + 1;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index f3ddca4..f3dc3ca 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -622,8 +622,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
p = (char __user *) current->mm->arg_start;
for (loop = bprm->argc; loop > 0; loop--) {
__put_user((elf_caddr_t) p, argv++);
- len = strnlen_user(p, PAGE_SIZE * MAX_ARG_PAGES);
- if (!len || len > PAGE_SIZE * MAX_ARG_PAGES)
+ len = strnlen_user(p, MAX_ARG_STRLEN);
+ if (!len || len > MAX_ARG_STRLEN)
return -EINVAL;
p += len;
}
@@ -634,8 +634,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
current->mm->env_start = (unsigned long) p;
for (loop = bprm->envc; loop > 0; loop--) {
__put_user((elf_caddr_t)(unsigned long) p, envp++);
- len = strnlen_user(p, PAGE_SIZE * MAX_ARG_PAGES);
- if (!len || len > PAGE_SIZE * MAX_ARG_PAGES)
+ len = strnlen_user(p, MAX_ARG_STRLEN);
+ if (!len || len > MAX_ARG_STRLEN)
return -EINVAL;
p += len;
}
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index e6f5799..aa9ae99 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -126,7 +126,9 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
goto _ret;
if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
- remove_arg_zero(bprm);
+ retval = remove_arg_zero(bprm);
+ if (retval)
+ goto _ret;
}
if (fmt->flags & MISC_FMT_OPEN_BINARY) {
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 1edbcca..5b6309f 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -68,7 +68,9 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
* This is done in reverse order, because of how the
* user environment and arguments are stored.
*/
- remove_arg_zero(bprm);
+ retval = remove_arg_zero(bprm);
+ if (retval)
+ return retval;
retval = copy_strings_kernel(1, &bprm->interp, bprm);
if (retval < 0) return retval;
bprm->argc++;
diff --git a/fs/compat.c b/fs/compat.c
index 72e5e69..5279745 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1391,6 +1391,7 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
{
struct page *kmapped_page = NULL;
char *kaddr = NULL;
+ unsigned long kpos = 0;
int ret;
while (argc-- > 0) {
@@ -1399,92 +1400,84 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
unsigned long pos;
if (get_user(str, argv+argc) ||
- !(len = strnlen_user(compat_ptr(str), bprm->p))) {
+ !(len = strnlen_user(compat_ptr(str), MAX_ARG_STRLEN))) {
ret = -EFAULT;
goto out;
}
- if (bprm->p < len) {
+ if (MAX_ARG_STRLEN < len) {
ret = -E2BIG;
goto out;
}
- bprm->p -= len;
- /* XXX: add architecture specific overflow check here. */
+ /* We're going to work our way backwords. */
pos = bprm->p;
+ str += len;
+ bprm->p -= len;
while (len > 0) {
- int i, new, err;
int offset, bytes_to_copy;
- struct page *page;
offset = pos % PAGE_SIZE;
- i = pos/PAGE_SIZE;
- page = bprm->page[i];
- new = 0;
- if (!page) {
- page = alloc_page(GFP_HIGHUSER);
- bprm->page[i] = page;
- if (!page) {
- ret = -ENOMEM;
+ if (offset == 0)
+ offset = PAGE_SIZE;
+
+ bytes_to_copy = offset;
+ if (bytes_to_copy > len)
+ bytes_to_copy = len;
+
+ offset -= bytes_to_copy;
+ pos -= bytes_to_copy;
+ str -= bytes_to_copy;
+ len -= bytes_to_copy;
+
+ if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
+ struct page *page;
+
+#ifdef CONFIG_STACK_GROWSUP
+ ret = expand_downwards(bprm->vma, pos);
+ if (ret < 0) {
+ /* We've exceed the stack rlimit. */
+ ret = -E2BIG;
+ goto out;
+ }
+#endif
+ ret = get_user_pages(current, bprm->mm, pos,
+ 1, 1, 1, &page, NULL);
+ if (ret <= 0) {
+ /* We've exceed the stack rlimit. */
+ ret = -E2BIG;
goto out;
}
- new = 1;
- }
- if (page != kmapped_page) {
- if (kmapped_page)
+ if (kmapped_page) {
+ flush_kernel_dcache_page(kmapped_page);
kunmap(kmapped_page);
+ put_page(kmapped_page);
+ }
kmapped_page = page;
kaddr = kmap(kmapped_page);
+ kpos = pos & PAGE_MASK;
+ flush_cache_page(bprm->vma, kpos,
+ page_to_pfn(kmapped_page));
}
- if (new && offset)
- memset(kaddr, 0, offset);
- bytes_to_copy = PAGE_SIZE - offset;
- if (bytes_to_copy > len) {
- bytes_to_copy = len;
- if (new)
- memset(kaddr+offset+len, 0,
- PAGE_SIZE-offset-len);
- }
- err = copy_from_user(kaddr+offset, compat_ptr(str),
- bytes_to_copy);
- if (err) {
+ if (copy_from_user(kaddr+offset, compat_ptr(str),
+ bytes_to_copy)) {
ret = -EFAULT;
goto out;
}
-
- pos += bytes_to_copy;
- str += bytes_to_copy;
- len -= bytes_to_copy;
}
}
ret = 0;
out:
- if (kmapped_page)
+ if (kmapped_page) {
+ flush_kernel_dcache_page(kmapped_page);
kunmap(kmapped_page);
- return ret;
-}
-
-#ifdef CONFIG_MMU
-
-#define free_arg_pages(bprm) do { } while (0)
-
-#else
-
-static inline void free_arg_pages(struct linux_binprm *bprm)
-{
- int i;
-
- for (i = 0; i < MAX_ARG_PAGES; i++) {
- if (bprm->page[i])
- __free_page(bprm->page[i]);
- bprm->page[i] = NULL;
+ put_page(kmapped_page);
}
+ return ret;
}
-#endif /* CONFIG_MMU */
-
/*
* compat_do_execve() is mostly a copy of do_execve(), with the exception
* that it processes 32 bit argv and envp pointers.
@@ -1497,7 +1490,6 @@ int compat_do_execve(char * filename,
struct linux_binprm *bprm;
struct file *file;
int retval;
- int i;
retval = -ENOMEM;
bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
@@ -1511,24 +1503,19 @@ int compat_do_execve(char * filename,
sched_exec();
- bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
bprm->file = file;
bprm->filename = filename;
bprm->interp = filename;
- bprm->mm = mm_alloc();
- retval = -ENOMEM;
- if (!bprm->mm)
- goto out_file;
- retval = init_new_context(current, bprm->mm);
- if (retval < 0)
- goto out_mm;
+ retval = bprm_mm_init(bprm);
+ if (retval)
+ goto out_file;
- bprm->argc = compat_count(argv, bprm->p / sizeof(compat_uptr_t));
+ bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
if ((retval = bprm->argc) < 0)
goto out_mm;
- bprm->envc = compat_count(envp, bprm->p / sizeof(compat_uptr_t));
+ bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
if ((retval = bprm->envc) < 0)
goto out_mm;
@@ -1553,10 +1540,8 @@ int compat_do_execve(char * filename,
if (retval < 0)
goto out;
- retval = search_binary_handler(bprm, regs);
+ retval = search_binary_handler(bprm,regs);
if (retval >= 0) {
- free_arg_pages(bprm);
-
/* execve success */
security_bprm_free(bprm);
acct_update_integrals(current);
@@ -1565,19 +1550,12 @@ int compat_do_execve(char * filename,
}
out:
- /* Something went wrong, return the inode and free the argument pages*/
- for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
- struct page * page = bprm->page[i];
- if (page)
- __free_page(page);
- }
-
if (bprm->security)
security_bprm_free(bprm);
out_mm:
if (bprm->mm)
- mmdrop(bprm->mm);
+ mmput (bprm->mm);
out_file:
if (bprm->file) {
diff --git a/fs/exec.c b/fs/exec.c
index 3155e91..8637630 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -53,6 +53,7 @@
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
+#include <asm/tlb.h>
#ifdef CONFIG_KMOD
#include <linux/kmod.h>
@@ -173,6 +174,153 @@ exit:
goto out;
}
+#ifdef CONFIG_MMU
+
+static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+ int write)
+{
+ struct page *page;
+ int ret;
+
+#ifdef CONFIG_STACK_GROWSUP
+ if (write) {
+ ret = expand_downwards(bprm->vma, pos);
+ if (ret < 0)
+ return NULL;
+ }
+#endif
+ ret = get_user_pages(current, bprm->mm, pos,
+ 1, write, 1, &page, NULL);
+ if (ret <= 0)
+ return NULL;
+
+ return page;
+}
+
+static void put_arg_page(struct page *page)
+{
+ put_page(page);
+}
+
+static void free_arg_page(struct linux_binprm *bprm, int i)
+{
+}
+
+static void free_arg_pages(struct linux_binprm *bprm)
+{
+}
+
+#else
+
+static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+ int write)
+{
+ struct page *page;
+
+ page = bprm->page[pos / PAGE_SIZE];
+ if (!page && write) {
+ page = alloc_page(GFP_HIGHUSER|__GFP_ZERO);
+ if (!page)
+ return NULL;
+ bprm->page[pos / PAGE_SIZE] = page;
+ }
+
+ return page;
+}
+
+static void put_arg_page(struct page *page)
+{
+}
+
+static void free_arg_page(struct linux_binprm *bprm, int i)
+{
+ if (bprm->page[i]) {
+ __free_page(bprm->page[i]);
+ bprm->page[i] = NULL;
+ }
+}
+
+static void free_arg_pages(struct linux_binprm *bprm)
+{
+ int i;
+
+ for (i = 0; i < MAX_ARG_PAGES; i++)
+ free_arg_page(bprm, i);
+}
+
+#endif /* CONFIG_MMU */
+
+/* Create a new mm_struct and populate it with a temporary stack
+ * vm_area_struct. We don't have enough context at this point to set the
+ * stack flags, permissions, and offset, so we use temporary values. We'll
+ * update them later in setup_arg_pages(). */
+int bprm_mm_init(struct linux_binprm *bprm)
+{
+ int err;
+ struct mm_struct *mm = NULL;
+ struct vm_area_struct *vma = NULL;
+
+ bprm->mm = mm = mm_alloc();
+ err = -ENOMEM;
+ if (!mm)
+ goto err;
+
+ if ((err = init_new_context(current, mm)))
+ goto err;
+
+#ifdef CONFIG_MMU
+ bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ err = -ENOMEM;
+ if (!vma)
+ goto err;
+
+ down_write(&mm->mmap_sem);
+ {
+ vma->vm_mm = mm;
+
+ /* Place the stack at the top of user memory. Later, we'll
+ * move this to an appropriate place. We don't use STACK_TOP
+ * because that can depend on attributes which aren't
+ * configured yet. */
+ vma->vm_end = TASK_SIZE;
+ vma->vm_start = vma->vm_end - PAGE_SIZE;
+
+ vma->vm_flags = VM_STACK_FLAGS;
+ vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
+ if ((err = insert_vm_struct(mm, vma))) {
+ up_write(&mm->mmap_sem);
+ goto err;
+ }
+
+ mm->stack_vm = mm->total_vm = 1;
+ }
+ up_write(&mm->mmap_sem);
+
+ bprm->p = vma->vm_end - sizeof(void *);
+#else
+ bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
+#endif
+
+ return 0;
+
+err:
+#ifdef CONFIG_MMU
+ if (vma) {
+ bprm->vma = NULL;
+ kmem_cache_free(vm_area_cachep, vma);
+ }
+#endif
+
+ if (mm) {
+ bprm->mm = NULL;
+ mmdrop(mm);
+ }
+
+ return err;
+}
+
+EXPORT_SYMBOL(bprm_mm_init);
+
/*
* count() counts the number of strings in array ARGV.
*/
@@ -198,15 +346,16 @@ static int count(char __user * __user * argv, int max)
}
/*
- * 'copy_strings()' copies argument/environment strings from user
- * memory to free pages in kernel mem. These are in a format ready
- * to be put directly into the top of new user memory.
+ * 'copy_strings()' copies argument/environment strings from the old
+ * processes's memory to the new process's stack. The call to get_user_pages()
+ * ensures the destination page is created and not swapped out.
*/
static int copy_strings(int argc, char __user * __user * argv,
struct linux_binprm *bprm)
{
struct page *kmapped_page = NULL;
char *kaddr = NULL;
+ unsigned long kpos = 0;
int ret;
while (argc-- > 0) {
@@ -215,69 +364,77 @@ static int copy_strings(int argc, char __user * __user * argv,
unsigned long pos;
if (get_user(str, argv+argc) ||
- !(len = strnlen_user(str, bprm->p))) {
+ !(len = strnlen_user(str, MAX_ARG_STRLEN))) {
ret = -EFAULT;
goto out;
}
- if (bprm->p < len) {
+#ifdef CONFIG_MMU
+ if (MAX_ARG_STRLEN < len) {
ret = -E2BIG;
goto out;
}
+#else
+ if (bprm->p < len) {
+ ret = -E2BIG;
+ goto out;
+ }
+#endif
- bprm->p -= len;
- /* XXX: add architecture specific overflow check here. */
+ /* We're going to work our way backwords. */
pos = bprm->p;
+ str += len;
+ bprm->p -= len;
while (len > 0) {
- int i, new, err;
int offset, bytes_to_copy;
- struct page *page;
offset = pos % PAGE_SIZE;
- i = pos/PAGE_SIZE;
- page = bprm->page[i];
- new = 0;
- if (!page) {
- page = alloc_page(GFP_HIGHUSER);
- bprm->page[i] = page;
+ if (offset == 0)
+ offset = PAGE_SIZE;
+
+ bytes_to_copy = offset;
+ if (bytes_to_copy > len)
+ bytes_to_copy = len;
+
+ offset -= bytes_to_copy;
+ pos -= bytes_to_copy;
+ str -= bytes_to_copy;
+ len -= bytes_to_copy;
+
+ if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
+ struct page *page;
+
+ page = get_arg_page(bprm, pos, 1);
if (!page) {
- ret = -ENOMEM;
+ ret = -E2BIG;
goto out;
}
- new = 1;
- }
- if (page != kmapped_page) {
- if (kmapped_page)
+ if (kmapped_page) {
+ flush_kernel_dcache_page(kmapped_page);
kunmap(kmapped_page);
+ put_arg_page(kmapped_page);
+ }
kmapped_page = page;
kaddr = kmap(kmapped_page);
+ kpos = pos & PAGE_MASK;
+ flush_cache_page(bprm->vma, kpos,
+ page_to_pfn(kmapped_page));
}
- if (new && offset)
- memset(kaddr, 0, offset);
- bytes_to_copy = PAGE_SIZE - offset;
- if (bytes_to_copy > len) {
- bytes_to_copy = len;
- if (new)
- memset(kaddr+offset+len, 0,
- PAGE_SIZE-offset-len);
- }
- err = copy_from_user(kaddr+offset, str, bytes_to_copy);
- if (err) {
+ if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
ret = -EFAULT;
goto out;
}
-
- pos += bytes_to_copy;
- str += bytes_to_copy;
- len -= bytes_to_copy;
}
}
ret = 0;
out:
- if (kmapped_page)
+ if (kmapped_page) {
+ flush_kernel_dcache_page(kmapped_page);
kunmap(kmapped_page);
+ put_arg_page(kmapped_page);
+ }
return ret;
}
@@ -297,154 +454,157 @@ int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
EXPORT_SYMBOL(copy_strings_kernel);
#ifdef CONFIG_MMU
-/*
- * This routine is used to map in a page into an address space: needed by
- * execve() for the initial stack and environment pages.
- *
- * vma->vm_mm->mmap_sem is held for writing.
- */
-void install_arg_page(struct vm_area_struct *vma,
- struct page *page, unsigned long address)
+
+static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
{
struct mm_struct *mm = vma->vm_mm;
- pte_t * pte;
- spinlock_t *ptl;
+ unsigned long old_start = vma->vm_start;
+ unsigned long old_end = vma->vm_end;
+ unsigned long length = old_end - old_start;
+ unsigned long new_start = old_start + shift;
+ unsigned long new_end = old_end + shift;
+ struct mmu_gather *tlb;
+
+ BUG_ON(new_start > new_end);
+
+ if (new_start < old_start) {
+ if (vma != find_vma(mm, new_start))
+ return -EFAULT;
+
+ vma_adjust(vma, new_start, old_end,
+ vma->vm_pgoff - (-shift >> PAGE_SHIFT), NULL);
+
+ if (length != move_page_tables(vma, old_start,
+ vma, new_start, length))
+ return -ENOMEM;
+
+ lru_add_drain();
+ tlb = tlb_gather_mmu(mm, 0);
+ free_pgd_range(&tlb, new_end, old_end, new_end,
+ vma->vm_next ? vma->vm_next->vm_start : 0);
+ tlb_finish_mmu(tlb, new_end, old_end);
+
+ vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL);
+ } else {
+ struct vm_area_struct *tmp, *prev;
+
+ tmp = find_vma_prev(mm, new_end, &prev);
+ if ((tmp && tmp->vm_start < new_end) || prev != vma)
+ return -EFAULT;
- if (unlikely(anon_vma_prepare(vma)))
- goto out;
+ find_vma_prev(mm, vma->vm_start, &prev);
- flush_dcache_page(page);
- pte = get_locked_pte(mm, address, &ptl);
- if (!pte)
- goto out;
- if (!pte_none(*pte)) {
- pte_unmap_unlock(pte, ptl);
- goto out;
+ vma_adjust(vma, old_start, new_end, vma->vm_pgoff, NULL);
+
+ if (length != move_page_tables_up(vma, old_start,
+ vma, new_start, length))
+ return -ENOMEM;
+
+ lru_add_drain();
+ tlb = tlb_gather_mmu(mm, 0);
+ free_pgd_range(&tlb, old_start, new_start,
+ prev ? prev->vm_end: 0, new_start);
+ tlb_finish_mmu(tlb, old_start, new_start);
+
+ vma_adjust(vma, new_start, new_end,
+ vma->vm_pgoff + (shift >> PAGE_SHIFT), NULL);
}
- inc_mm_counter(mm, anon_rss);
- lru_cache_add_active(page);
- set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
- page, vma->vm_page_prot))));
- page_add_new_anon_rmap(page, vma, address);
- pte_unmap_unlock(pte, ptl);
-
- /* no need for flush_tlb */
- return;
-out:
- __free_page(page);
- force_sig(SIGKILL, current);
+
+ return 0;
}
#define EXTRA_STACK_VM_PAGES 20 /* random */
+/* Finalizes the stack vm_area_struct. The flags and permissions are updated,
+ * the stack is optionally relocated, and some extra space is added.
+ */
int setup_arg_pages(struct linux_binprm *bprm,
unsigned long stack_top,
int executable_stack)
{
- unsigned long stack_base;
- struct vm_area_struct *mpnt;
+ unsigned long ret;
+ unsigned long stack_base, stack_shift;
struct mm_struct *mm = current->mm;
- int i, ret;
- long arg_size;
+ struct vm_area_struct *vma = bprm->vma;
#ifdef CONFIG_STACK_GROWSUP
- /* Move the argument and environment strings to the bottom of the
- * stack space.
- */
- int offset, j;
- char *to, *from;
-
- /* Start by shifting all the pages down */
- i = 0;
- for (j = 0; j < MAX_ARG_PAGES; j++) {
- struct page *page = bprm->page[j];
- if (!page)
- continue;
- bprm->page[i++] = page;
- }
-
- /* Now move them within their pages */
- offset = bprm->p % PAGE_SIZE;
- to = kmap(bprm->page[0]);
- for (j = 1; j < i; j++) {
- memmove(to, to + offset, PAGE_SIZE - offset);
- from = kmap(bprm->page[j]);
- memcpy(to + PAGE_SIZE - offset, from, offset);
- kunmap(bprm->page[j - 1]);
- to = from;
- }
- memmove(to, to + offset, PAGE_SIZE - offset);
- kunmap(bprm->page[j - 1]);
-
/* Limit stack size to 1GB */
stack_base = current->signal->rlim[RLIMIT_STACK].rlim_max;
if (stack_base > (1 << 30))
stack_base = 1 << 30;
stack_base = PAGE_ALIGN(stack_top - stack_base);
- /* Adjust bprm->p to point to the end of the strings. */
- bprm->p = stack_base + PAGE_SIZE * i - offset;
-
- mm->arg_start = stack_base;
- arg_size = i << PAGE_SHIFT;
+ /* Make sure we didn't let the argument array grow too large. */
+ if (vma->vm_end - vma->vm_start > STACK_TOP - stack_base)
+ return -ENOMEM;
- /* zero pages that were copied above */
- while (i < MAX_ARG_PAGES)
- bprm->page[i++] = NULL;
+ stack_shift = stack_base - vma->vm_start;
+ mm->arg_start = bprm->p + stack_shift;
+ bprm->p = vma->vm_end + stack_shift;
#else
- stack_base = arch_align_stack(stack_top - MAX_ARG_PAGES*PAGE_SIZE);
+ BUG_ON(stack_top > STACK_TOP);
+ BUG_ON(stack_top & ~PAGE_MASK);
+
+ stack_base = arch_align_stack(stack_top - mm->stack_vm*PAGE_SIZE);
stack_base = PAGE_ALIGN(stack_base);
- bprm->p += stack_base;
+
+ /* Make sure we didn't let the argument array grow too large. */
+ if (stack_base > stack_top)
+ return -ENOMEM;
+
+ stack_shift = stack_base - (bprm->p & PAGE_MASK);
+ bprm->p += stack_shift;
mm->arg_start = bprm->p;
- arg_size = stack_top - (PAGE_MASK & (unsigned long) mm->arg_start);
#endif
- arg_size += EXTRA_STACK_VM_PAGES * PAGE_SIZE;
-
if (bprm->loader)
- bprm->loader += stack_base;
- bprm->exec += stack_base;
-
- mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
- if (!mpnt)
- return -ENOMEM;
+ bprm->loader += stack_shift;
+ bprm->exec += stack_shift;
down_write(&mm->mmap_sem);
{
- mpnt->vm_mm = mm;
-#ifdef CONFIG_STACK_GROWSUP
- mpnt->vm_start = stack_base;
- mpnt->vm_end = stack_base + arg_size;
-#else
- mpnt->vm_end = stack_top;
- mpnt->vm_start = mpnt->vm_end - arg_size;
-#endif
+ struct vm_area_struct *prev = NULL;
+ unsigned long vm_flags = vma->vm_flags;
+
/* Adjust stack execute permissions; explicitly enable
* for EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X
* and leave alone (arch default) otherwise. */
if (unlikely(executable_stack == EXSTACK_ENABLE_X))
- mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC;
+ vm_flags |= VM_EXEC;
else if (executable_stack == EXSTACK_DISABLE_X)
- mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
- else
- mpnt->vm_flags = VM_STACK_FLAGS;
- mpnt->vm_flags |= mm->def_flags;
- mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7];
- if ((ret = insert_vm_struct(mm, mpnt))) {
+ vm_flags &= ~VM_EXEC;
+ vm_flags |= mm->def_flags;
+
+ ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end,
+ vm_flags);
+ if (ret) {
up_write(&mm->mmap_sem);
- kmem_cache_free(vm_area_cachep, mpnt);
return ret;
}
- mm->stack_vm = mm->total_vm = vma_pages(mpnt);
- }
+ BUG_ON(prev != vma);
+
+ /* Move stack pages down in memory. */
+ if (stack_shift) {
+ ret = shift_arg_pages(vma, stack_shift);
+ if (ret) {
+ up_write(&mm->mmap_sem);
+ return ret;
+ }
+ }
- for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
- struct page *page = bprm->page[i];
- if (page) {
- bprm->page[i] = NULL;
- install_arg_page(mpnt, page, stack_base);
+#ifdef CONFIG_STACK_GROWSUP
+ if (expand_stack(vma, vma->vm_end +
+ EXTRA_STACK_VM_PAGES * PAGE_SIZE)) {
+ up_write(&mm->mmap_sem);
+ return -EFAULT;
+ }
+#else
+ if (expand_stack(vma, stack_base -
+ EXTRA_STACK_VM_PAGES * PAGE_SIZE)) {
+ up_write(&mm->mmap_sem);
+ return -EFAULT;
}
- stack_base += PAGE_SIZE;
+#endif
}
up_write(&mm->mmap_sem);
@@ -453,21 +613,6 @@ int setup_arg_pages(struct linux_binprm *bprm,
EXPORT_SYMBOL(setup_arg_pages);
-#define free_arg_pages(bprm) do { } while (0)
-
-#else
-
-static inline void free_arg_pages(struct linux_binprm *bprm)
-{
- int i;
-
- for (i = 0; i < MAX_ARG_PAGES; i++) {
- if (bprm->page[i])
- __free_page(bprm->page[i]);
- bprm->page[i] = NULL;
- }
-}
-
#endif /* CONFIG_MMU */
struct file *open_exec(const char *name)
@@ -985,28 +1130,47 @@ void compute_creds(struct linux_binprm *bprm)
EXPORT_SYMBOL(compute_creds);
-void remove_arg_zero(struct linux_binprm *bprm)
+/*
+ * Arguments are '\0' separated strings found at the location bprm->p
+ * points to; chop off the first by relocating brpm->p to right after
+ * the first '\0' encountered.
+ */
+int remove_arg_zero(struct linux_binprm *bprm)
{
- if (bprm->argc) {
- unsigned long offset;
- char * kaddr;
- struct page *page;
+ int ret = 0;
+ unsigned long offset;
+ char *kaddr;
+ struct page *page;
- offset = bprm->p % PAGE_SIZE;
- goto inside;
+ if (!bprm->argc)
+ return 0;
- while (bprm->p++, *(kaddr+offset++)) {
- if (offset != PAGE_SIZE)
- continue;
- offset = 0;
- kunmap_atomic(kaddr, KM_USER0);
-inside:
- page = bprm->page[bprm->p/PAGE_SIZE];
- kaddr = kmap_atomic(page, KM_USER0);
+ do {
+ offset = bprm->p & ~PAGE_MASK;
+ page = get_arg_page(bprm, bprm->p, 0);
+ if (!page) {
+ ret = -EFAULT;
+ goto out;
}
+ kaddr = kmap_atomic(page, KM_USER0);
+
+ for (; offset < PAGE_SIZE && kaddr[offset];
+ offset++, bprm->p++)
+ ;
+
kunmap_atomic(kaddr, KM_USER0);
- bprm->argc--;
- }
+ put_arg_page(page);
+
+ if (offset == PAGE_SIZE)
+ free_arg_page(bprm, (bprm->p >> PAGE_SHIFT) - 1);
+ } while (offset == PAGE_SIZE);
+
+ bprm->p++;
+ bprm->argc--;
+ ret = 0;
+
+out:
+ return ret;
}
EXPORT_SYMBOL(remove_arg_zero);
@@ -1033,7 +1197,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
fput(bprm->file);
bprm->file = NULL;
- loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
+ loader = bprm->vma->vm_end - sizeof(void *);
file = open_exec("/sbin/loader");
retval = PTR_ERR(file);
@@ -1125,8 +1289,8 @@ int do_execve(char * filename,
{
struct linux_binprm *bprm;
struct file *file;
+ unsigned long tmp;
int retval;
- int i;
retval = -ENOMEM;
bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
@@ -1140,25 +1304,19 @@ int do_execve(char * filename,
sched_exec();
- bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
-
bprm->file = file;
bprm->filename = filename;
bprm->interp = filename;
- bprm->mm = mm_alloc();
- retval = -ENOMEM;
- if (!bprm->mm)
- goto out_file;
- retval = init_new_context(current, bprm->mm);
- if (retval < 0)
- goto out_mm;
+ retval = bprm_mm_init(bprm);
+ if (retval)
+ goto out_file;
- bprm->argc = count(argv, bprm->p / sizeof(void *));
+ bprm->argc = count(argv, MAX_ARG_STRINGS);
if ((retval = bprm->argc) < 0)
goto out_mm;
- bprm->envc = count(envp, bprm->p / sizeof(void *));
+ bprm->envc = count(envp, MAX_ARG_STRINGS);
if ((retval = bprm->envc) < 0)
goto out_mm;
@@ -1179,15 +1337,16 @@ int do_execve(char * filename,
if (retval < 0)
goto out;
+ tmp = bprm->p;
retval = copy_strings(bprm->argc, argv, bprm);
if (retval < 0)
goto out;
+ bprm->argv_len = tmp - bprm->p;
retval = search_binary_handler(bprm,regs);
if (retval >= 0) {
- free_arg_pages(bprm);
-
/* execve success */
+ free_arg_pages(bprm);
security_bprm_free(bprm);
acct_update_integrals(current);
kfree(bprm);
@@ -1195,26 +1354,19 @@ int do_execve(char * filename,
}
out:
- /* Something went wrong, return the inode and free the argument pages*/
- for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
- struct page * page = bprm->page[i];
- if (page)
- __free_page(page);
- }
-
+ free_arg_pages(bprm);
if (bprm->security)
security_bprm_free(bprm);
out_mm:
if (bprm->mm)
- mmdrop(bprm->mm);
+ mmput (bprm->mm);
out_file:
if (bprm->file) {
allow_write_access(bprm->file);
fput(bprm->file);
}
-
out_kfree:
kfree(bprm);
diff --git a/include/asm-um/processor-i386.h b/include/asm-um/processor-i386.h
index 595f1c3..869c236 100644
--- a/include/asm-um/processor-i386.h
+++ b/include/asm-um/processor-i386.h
@@ -67,9 +67,6 @@ static inline void rep_nop(void)
#define current_text_addr() \
({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
-#define ARCH_IS_STACKGROW(address) \
- (address + 32 >= UPT_SP(¤t->thread.regs.regs))
-
#define KSTK_EIP(tsk) KSTK_REG(tsk, EIP)
#define KSTK_ESP(tsk) KSTK_REG(tsk, UESP)
#define KSTK_EBP(tsk) KSTK_REG(tsk, EBP)
diff --git a/include/asm-um/processor-x86_64.h b/include/asm-um/processor-x86_64.h
index 31c2d4d..64d7bdd 100644
--- a/include/asm-um/processor-x86_64.h
+++ b/include/asm-um/processor-x86_64.h
@@ -44,9 +44,6 @@ static inline void arch_copy_thread(struct arch_thread *from,
#define current_text_addr() \
({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; })
-#define ARCH_IS_STACKGROW(address) \
- (address + 128 >= UPT_SP(¤t->thread.regs.regs))
-
#define KSTK_EIP(tsk) KSTK_REG(tsk, RIP)
#define KSTK_ESP(tsk) KSTK_REG(tsk, RSP)
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 2d956cd..8ac2277 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -5,12 +5,9 @@
struct pt_regs;
-/*
- * MAX_ARG_PAGES defines the number of pages allocated for arguments
- * and envelope for the new program. 32 should suffice, this gives
- * a maximum env+arg of 128kB w/4KB pages!
- */
-#define MAX_ARG_PAGES 32
+/* FIXME: Find real limits, or none. */
+#define MAX_ARG_STRLEN (PAGE_SIZE * 32)
+#define MAX_ARG_STRINGS 0x7FFFFFFF
/* sizeof(linux_binprm->buf) */
#define BINPRM_BUF_SIZE 128
@@ -22,7 +19,12 @@ struct pt_regs;
*/
struct linux_binprm{
char buf[BINPRM_BUF_SIZE];
+#ifdef CONFIG_MMU
+ struct vm_area_struct *vma;
+#else
+# define MAX_ARG_PAGES 32
struct page *page[MAX_ARG_PAGES];
+#endif
struct mm_struct *mm;
unsigned long p; /* current top of mem */
int sh_bang;
@@ -38,6 +40,7 @@ struct linux_binprm{
unsigned interp_flags;
unsigned interp_data;
unsigned long loader, exec;
+ unsigned long argv_len;
};
#define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
@@ -66,7 +69,7 @@ extern int register_binfmt(struct linux_binfmt *);
extern int unregister_binfmt(struct linux_binfmt *);
extern int prepare_binprm(struct linux_binprm *);
-extern void remove_arg_zero(struct linux_binprm *);
+extern int __must_check remove_arg_zero(struct linux_binprm *);
extern int search_binary_handler(struct linux_binprm *,struct pt_regs *);
extern int flush_old_exec(struct linux_binprm * bprm);
@@ -83,6 +86,7 @@ extern int suid_dumpable;
extern int setup_arg_pages(struct linux_binprm * bprm,
unsigned long stack_top,
int executable_stack);
+extern int bprm_mm_init(struct linux_binprm *bprm);
extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
extern void compute_creds(struct linux_binprm *binprm);
extern int do_coredump(long signr, int exit_code, struct pt_regs * regs);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 60e0e4a..ceee062 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -771,7 +771,6 @@ static inline int handle_mm_fault(struct mm_struct *mm,
extern int make_pages_present(unsigned long addr, unsigned long end);
extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
-void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
@@ -788,9 +787,18 @@ int FASTCALL(set_page_dirty(struct page *page));
int set_page_dirty_lock(struct page *page);
int clear_page_dirty_for_io(struct page *page);
+extern unsigned long move_page_tables(struct vm_area_struct *vma,
+ unsigned long old_addr, struct vm_area_struct *new_vma,
+ unsigned long new_addr, unsigned long len);
+extern unsigned long move_page_tables_up(struct vm_area_struct *vma,
+ unsigned long old_addr, struct vm_area_struct *new_vma,
+ unsigned long new_addr, unsigned long len);
extern unsigned long do_mremap(unsigned long addr,
unsigned long old_len, unsigned long new_len,
unsigned long flags, unsigned long new_addr);
+extern int mprotect_fixup(struct vm_area_struct *vma,
+ struct vm_area_struct **pprev, unsigned long start,
+ unsigned long end, unsigned long newflags);
/*
* Prototype to add a shrinker callback for ageable caches.
@@ -1091,6 +1099,9 @@ extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
#ifdef CONFIG_IA64
extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
#endif
+#ifdef CONFIG_STACK_GROWSUP
+extern int expand_downwards(struct vm_area_struct *vma, unsigned long address);
+#endif
/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 47f1c53..b97ca11 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -165,6 +165,7 @@ enum
KERN_MAX_LOCK_DEPTH=74,
KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
+ KERN_AUDIT_ARGV=77, /* int: max size of argv array for audit logging */
};
diff --git a/kernel/audit.c b/kernel/audit.c
index 4e9d208..9b08f55 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -170,6 +170,22 @@ void audit_panic(const char *message)
}
}
+void audit_kill(const char *message)
+{
+ switch (audit_failure)
+ {
+ case AUDIT_FAIL_SILENT:
+ break;
+ case AUDIT_FAIL_PRINTK:
+ printk(KERN_ERR "audit: %s\n", message);
+ break;
+ case AUDIT_FAIL_PANIC:
+ printk(KERN_ERR "audit: %s\n", message);
+ send_sig(SIGKILL, current, 0);
+ break;
+ }
+}
+
static inline int audit_rate_check(void)
{
static unsigned long last_check = 0;
diff --git a/kernel/audit.h b/kernel/audit.h
index a337023..9cad5ce 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -116,6 +116,7 @@ extern void audit_send_reply(int pid, int seq, int type,
void *payload, int size);
extern void audit_log_lost(const char *message);
extern void audit_panic(const char *message);
+extern void audit_kill(const char *message);
struct audit_netlink_list {
int pid;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 628c7ac..402252e 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -155,7 +155,7 @@ struct audit_aux_data_execve {
struct audit_aux_data d;
int argc;
int envc;
- char mem[0];
+ struct mm_struct *mm;
};
struct audit_aux_data_socketcall {
@@ -795,6 +795,48 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk
audit_log_task_context(ab);
}
+static void audit_log_execve_info(struct audit_buffer *ab,
+ struct audit_aux_data_execve *axi)
+{
+ int i;
+ long len;
+ const char __user *p = (const char __user *)axi->mm->arg_start;
+
+ if (axi->mm != current->mm)
+ return; /* execve failed, no additional info */
+
+ for (i = 0; i < axi->argc; i++, p += len) {
+ long ret;
+ char *tmp;
+
+ len = strnlen_user(p, MAX_ARG_STRLEN);
+ /*
+ * We just created this mm, if we can't find the strings
+ * we just copied in something is _very_ wrong.
+ */
+ BUG_ON(!len);
+
+ tmp = kmalloc(len, GFP_KERNEL);
+ if (!tmp) {
+ audit_kill("out of memory for argv string,"
+ " terminating process\n");
+ break;
+ }
+
+ ret = copy_from_user(tmp, p, len);
+ /*
+ * There is no reason for this copy to be short.
+ */
+ BUG_ON(ret);
+
+ audit_log_format(ab, "a%d=", i);
+ audit_log_untrustedstring(ab, tmp);
+ audit_log_format(ab, "\n");
+
+ kfree(tmp);
+ }
+}
+
static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
{
int i, call_panic = 0;
@@ -935,13 +977,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
case AUDIT_EXECVE: {
struct audit_aux_data_execve *axi = (void *)aux;
- int i;
- const char *p;
- for (i = 0, p = axi->mem; i < axi->argc; i++) {
- audit_log_format(ab, "a%d=", i);
- p = audit_log_untrustedstring(ab, p);
- audit_log_format(ab, "\n");
- }
+ audit_log_execve_info(ab, axi);
break; }
case AUDIT_SOCKETCALL: {
@@ -1761,32 +1797,31 @@ int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode
return 0;
}
+int audit_argv_kb = 32;
+
int audit_bprm(struct linux_binprm *bprm)
{
struct audit_aux_data_execve *ax;
struct audit_context *context = current->audit_context;
- unsigned long p, next;
- void *to;
if (likely(!audit_enabled || !context || context->dummy))
return 0;
- ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p,
- GFP_KERNEL);
+ /*
+ * Even though the stack code doesn't limit the arg+env size any more,
+ * the audit code requires that _all_ arguments be logged in a single
+ * netlink skb. Hence cap it :-(
+ */
+ if (bprm->argv_len > (audit_argv_kb << 10))
+ return -E2BIG;
+
+ ax = kmalloc(sizeof(*ax), GFP_KERNEL);
if (!ax)
return -ENOMEM;
ax->argc = bprm->argc;
ax->envc = bprm->envc;
- for (p = bprm->p, to = ax->mem; p < MAX_ARG_PAGES*PAGE_SIZE; p = next) {
- struct page *page = bprm->page[p / PAGE_SIZE];
- void *kaddr = kmap(page);
- next = (p + PAGE_SIZE) & ~(PAGE_SIZE - 1);
- memcpy(to, kaddr + (p & (PAGE_SIZE - 1)), next - p);
- to += next - p;
- kunmap(page);
- }
-
+ ax->mm = bprm->mm;
ax->d.type = AUDIT_EXECVE;
ax->d.next = context->aux;
context->aux = (void *)ax;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c904748..5dfd4d7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -76,6 +76,7 @@ extern int pid_max_min, pid_max_max;
extern int sysctl_drop_caches;
extern int percpu_pagelist_fraction;
extern int compat_log;
+extern int audit_argv_kb;
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
static int maxolduid = 65535;
@@ -603,6 +604,16 @@ static ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
+#ifdef CONFIG_AUDITSYSCALL
+ {
+ .ctl_name = KERN_AUDIT_ARGV,
+ .procname = "audit_argv_kb",
+ .data = &audit_argv_kb,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif
{ .ctl_name = 0 }
};
diff --git a/mm/mmap.c b/mm/mmap.c
index 88da687..8c6ceb5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1569,33 +1569,13 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
}
#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
-#ifdef CONFIG_STACK_GROWSUP
-int expand_stack(struct vm_area_struct *vma, unsigned long address)
-{
- return expand_upwards(vma, address);
-}
-
-struct vm_area_struct *
-find_extend_vma(struct mm_struct *mm, unsigned long addr)
-{
- struct vm_area_struct *vma, *prev;
-
- addr &= PAGE_MASK;
- vma = find_vma_prev(mm, addr, &prev);
- if (vma && (vma->vm_start <= addr))
- return vma;
- if (!prev || expand_stack(prev, addr))
- return NULL;
- if (prev->vm_flags & VM_LOCKED) {
- make_pages_present(addr, prev->vm_end);
- }
- return prev;
-}
-#else
/*
* vma is the first one with address < vma->vm_start. Have to extend vma.
*/
-int expand_stack(struct vm_area_struct *vma, unsigned long address)
+#ifndef CONFIG_STACK_GROWSUP
+static inline
+#endif
+int expand_downwards(struct vm_area_struct *vma, unsigned long address)
{
int error;
@@ -1632,6 +1612,34 @@ int expand_stack(struct vm_area_struct *vma, unsigned long address)
return error;
}
+#ifdef CONFIG_STACK_GROWSUP
+int expand_stack(struct vm_area_struct *vma, unsigned long address)
+{
+ return expand_upwards(vma, address);
+}
+
+struct vm_area_struct *
+find_extend_vma(struct mm_struct *mm, unsigned long addr)
+{
+ struct vm_area_struct *vma, *prev;
+
+ addr &= PAGE_MASK;
+ vma = find_vma_prev(mm, addr, &prev);
+ if (vma && (vma->vm_start <= addr))
+ return vma;
+ if (!prev || expand_stack(prev, addr))
+ return NULL;
+ if (prev->vm_flags & VM_LOCKED) {
+ make_pages_present(addr, prev->vm_end);
+ }
+ return prev;
+}
+#else
+int expand_stack(struct vm_area_struct *vma, unsigned long address)
+{
+ return expand_downwards(vma, address);
+}
+
struct vm_area_struct *
find_extend_vma(struct mm_struct * mm, unsigned long addr)
{
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 3b8f3c0..e8346c3 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -128,7 +128,7 @@ static void change_protection(struct vm_area_struct *vma,
flush_tlb_range(vma, start, end);
}
-static int
+int
mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
unsigned long start, unsigned long end, unsigned long newflags)
{
diff --git a/mm/mremap.c b/mm/mremap.c
index 5d4bd4f..858a36b 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -118,9 +118,63 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
spin_unlock(&mapping->i_mmap_lock);
}
+static void move_ptes_up(struct vm_area_struct *vma, pmd_t *old_pmd,
+ unsigned long old_addr, unsigned long old_end,
+ struct vm_area_struct *new_vma, pmd_t *new_pmd,
+ unsigned long new_addr)
+{
+ struct address_space *mapping = NULL;
+ struct mm_struct *mm = vma->vm_mm;
+ pte_t *old_pte, *new_pte, pte;
+ spinlock_t *old_ptl, *new_ptl;
+ unsigned long new_end = new_addr + (old_end - old_addr);
+
+ if (vma->vm_file) {
+ /*
+ * Subtle point from Rajesh Venkatasubramanian: before
+ * moving file-based ptes, we must lock vmtruncate out,
+ * since it might clean the dst vma before the src vma,
+ * and we propagate stale pages into the dst afterward.
+ */
+ mapping = vma->vm_file->f_mapping;
+ spin_lock(&mapping->i_mmap_lock);
+ if (new_vma->vm_truncate_count &&
+ new_vma->vm_truncate_count != vma->vm_truncate_count)
+ new_vma->vm_truncate_count = 0;
+ }
+
+ /*
+ * We don't have to worry about the ordering of src and dst
+ * pte locks because exclusive mmap_sem prevents deadlock.
+ */
+ old_pte = pte_offset_map_lock(mm, old_pmd, old_end-1, &old_ptl);
+ new_pte = pte_offset_map_nested(new_pmd, new_end-1);
+ new_ptl = pte_lockptr(mm, new_pmd);
+ if (new_ptl != old_ptl)
+ spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+ arch_enter_lazy_mmu_mode();
+
+ for (; old_end > old_addr; old_pte--, old_end -= PAGE_SIZE,
+ new_pte--, new_end -= PAGE_SIZE) {
+ if (pte_none(*old_pte))
+ continue;
+ pte = ptep_clear_flush(vma, old_end-1, old_pte);
+ pte = move_pte(pte, new_vma->vm_page_prot, old_end-1, new_end-1);
+ set_pte_at(mm, new_end-1, new_pte, pte);
+ }
+
+ arch_leave_lazy_mmu_mode();
+ if (new_ptl != old_ptl)
+ spin_unlock(new_ptl);
+ pte_unmap_nested(new_pte - 1);
+ pte_unmap_unlock(old_pte - 1, old_ptl);
+ if (mapping)
+ spin_unlock(&mapping->i_mmap_lock);
+}
+
#define LATENCY_LIMIT (64 * PAGE_SIZE)
-static unsigned long move_page_tables(struct vm_area_struct *vma,
+unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long old_addr, struct vm_area_struct *new_vma,
unsigned long new_addr, unsigned long len)
{
@@ -132,21 +186,25 @@ static unsigned long move_page_tables(struct vm_area_struct *vma,
for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
cond_resched();
+
next = (old_addr + PMD_SIZE) & PMD_MASK;
if (next - 1 > old_end)
next = old_end;
extent = next - old_addr;
+
old_pmd = get_old_pmd(vma->vm_mm, old_addr);
if (!old_pmd)
continue;
new_pmd = alloc_new_pmd(vma->vm_mm, new_addr);
if (!new_pmd)
break;
+
next = (new_addr + PMD_SIZE) & PMD_MASK;
if (extent > next - new_addr)
extent = next - new_addr;
if (extent > LATENCY_LIMIT)
extent = LATENCY_LIMIT;
+
move_ptes(vma, old_pmd, old_addr, old_addr + extent,
new_vma, new_pmd, new_addr);
}
@@ -154,6 +212,51 @@ static unsigned long move_page_tables(struct vm_area_struct *vma,
return len + old_addr - old_end; /* how much done */
}
+unsigned long move_page_tables_up(struct vm_area_struct *vma,
+ unsigned long old_addr, struct vm_area_struct *new_vma,
+ unsigned long new_addr, unsigned long len)
+{
+ unsigned long extent, prev, old_end, new_end;
+ pmd_t *old_pmd, *new_pmd;
+
+ old_end = old_addr + len;
+ new_end = new_addr + len;
+ flush_cache_range(vma, old_addr, old_end);
+
+ for (; old_end > old_addr; old_end -= extent, new_end -= extent) {
+ cond_resched();
+
+ /*
+ * calculate how far till prev PMD boundary for old
+ */
+ prev = (old_end - 1) & PMD_MASK;
+ if (prev < old_addr)
+ prev = old_addr;
+ extent = old_end - prev;
+
+ old_pmd = get_old_pmd(vma->vm_mm, old_end-1);
+ if (!old_pmd)
+ continue;
+ new_pmd = alloc_new_pmd(vma->vm_mm, new_end-1);
+ if (!new_pmd)
+ break;
+
+ /*
+ * calculate and clip to prev PMD boundary for new
+ */
+ prev = (new_end - 1) & PMD_MASK;
+ if (extent > new_end - prev)
+ extent = new_end - prev;
+ if (extent > LATENCY_LIMIT)
+ extent = LATENCY_LIMIT;
+
+ move_ptes_up(vma, old_pmd, old_end - extent, old_end,
+ new_vma, new_pmd, new_end - extent);
+ }
+
+ return old_addr + len - old_end;
+}
+
static unsigned long move_vma(struct vm_area_struct *vma,
unsigned long old_addr, unsigned long old_len,
unsigned long new_len, unsigned long new_addr)
next reply other threads:[~2007-05-06 8:51 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-05-06 8:51 Ollie Wild [this message]
2007-05-07 17:46 ` Luck, Tony
2007-05-07 19:01 ` William Lee Irwin III
2007-05-22 12:24 ` Peter Zijlstra
2007-05-22 23:49 ` Luck, Tony
2007-05-24 15:20 ` Peter Zijlstra
2007-05-25 18:48 ` Luck, Tony
2007-05-09 20:48 ` Andrew Morton
2007-05-10 1:04 ` Rob Landley
2007-05-10 4:06 ` Ollie Wild
2007-05-10 9:19 ` Rob Landley
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=65dd6fd50705060151m78bb9b4fpcb941b16a8c4709e@mail.gmail.com \
--to=aaw@google.com \
--cc=a.p.zijlstra@chello.nl \
--cc=ak@suse.de \
--cc=akpm@osdl.org \
--cc=linux-arch@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mingo@elte.hu \
--cc=parisc-linux@lists.parisc-linux.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox