linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Eric B Munson <ebmunson@us.ibm.com>
To: linux-mm@kvack.org
Cc: nacc <nacc@linux.vnet.ibm.com>,
	mel@csn.ul.ie, andyw <andyw@linux.vnet.ibm.com>
Subject: [RFC][PATCH 2/2] Add huge page backed stack support
Date: Thu, 01 May 2008 18:51:49 -0700	[thread overview]
Message-ID: <1209693109.8483.23.camel@grover.beaverton.ibm.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 6364 bytes --]

This patch allows a process's stack to be backed by huge pages on request. As
the stack is setup at exec() time, a personality flag is added to indicate 
the use of a hugepage-backed stack. The personality flag is inherited across 
exec().

Huge page stacks require stack randomization to be disabled because huge
ptes are not movable, so the HUGE_PAGE_STACK personality flag implies
ADDR_NO_RANDOMIZE.  When the hugetlb file is setup to back the stack, it is
sized to fit the ulimit for stack size or 256 MB if ulimit is unlimited.
The GROWSUP and GROWSDOWN VM flags are turned off because a hugetlb backed
vma is not resizable, so it will be appropriately sized when created.  When
a process exceeds stack size it recieves a segfault exactly as it would if it
exceeded the ulimit.

Based on 2.6.25

Signed-off-by: Eric Munson <ebmunson@us.ibm.com>

---

 fs/exec.c                   |   87 ++++++++++++++++++++++++++++++++++++++----
 include/linux/personality.h |    3 +
 2 files changed, 81 insertions(+), 9 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index b152029..d38ddf0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -51,6 +51,7 @@
 #include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
 #include <linux/audit.h>
+#include <linux/hugetlb.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -60,6 +61,8 @@
 #include <linux/kmod.h>
 #endif
 
+#define MB (1024*1024)
+
 int core_uses_pid;
 char core_pattern[CORENAME_MAX_SIZE] = "core";
 int suid_dumpable = 0;
@@ -152,6 +155,13 @@ exit:
 	goto out;
 }
 
+static unsigned long personality_page_align(unsigned long addr)
+{
+	if (get_personality & HUGE_PAGE_STACK)
+		return HPAGE_ALIGN(addr);
+	return PAGE_ALIGN(addr);
+}
+
 #ifdef CONFIG_MMU
 
 static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
@@ -173,7 +183,12 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		return NULL;
 
 	if (write) {
-		unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
+		/*
+		 * Args are always placed at the high end of the stack space
+		 * so this calculation will give the proper size and it is
+		 * compatible with huge page stacks.
+		 */
+		unsigned long size = bprm->vma->vm_end - pos;
 		struct rlimit *rlim;
 
 		/*
@@ -219,16 +234,57 @@ static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
 	flush_cache_page(bprm->vma, pos, page_to_pfn(page));
 }
 
+static struct file *hugetlb_stack_file(int stack_hpages)
+{
+	struct file *hugefile = NULL;
+
+	if (!stack_hpages) {
+		set_personality(get_personality & (~HUGE_PAGE_STACK));
+		printk(KERN_DEBUG
+			"Stack rlimit set too low for huge page backed stack.\n");
+		return NULL;
+	}
+
+	hugefile = hugetlb_file_setup(HUGETLB_STACK_FILE,
+					HPAGE_SIZE * stack_hpages, 0);
+	if (unlikely(IS_ERR_VALUE(hugefile))) {
+		/*
+		 * If huge pages are not available for this stack fall
+		 * fall back to normal pages for execution instead of
+		 * failing.
+		 */
+		printk(KERN_DEBUG
+			"Huge page backed stack unavailable for process %lu.\n",
+			(unsigned long)current->pid);
+		set_personality(get_personality & (~HUGE_PAGE_STACK));
+		return NULL;
+	}
+	return hugefile;
+}
+
 static int __bprm_mm_init(struct linux_binprm *bprm)
 {
 	int err = -ENOMEM;
 	struct vm_area_struct *vma = NULL;
 	struct mm_struct *mm = bprm->mm;
+	struct file *hugefile = NULL;
+	struct rlimit *rlim;
+	int stack_hpages = 0;
 
 	bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (!vma)
 		goto err;
 
+	if (get_personality & HUGE_PAGE_STACK) {
+		rlim = current->signal->rlim;
+		if (rlim[RLIMIT_STACK].rlim_cur == _STK_LIM_MAX)
+			stack_hpages = (256 * MB) / HPAGE_SIZE;
+		else
+			stack_hpages = rlim[RLIMIT_STACK].rlim_cur / HPAGE_SIZE;
+
+		hugefile = hugetlb_stack_file(stack_hpages);
+	}
+
 	down_write(&mm->mmap_sem);
 	vma->vm_mm = mm;
 
@@ -239,9 +295,20 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	 * configured yet.
 	 */
 	vma->vm_end = STACK_TOP_MAX;
-	vma->vm_start = vma->vm_end - PAGE_SIZE;
 
 	vma->vm_flags = VM_STACK_FLAGS;
+
+	if (hugefile) {
+		vma->vm_flags &= ~(VM_GROWSUP|VM_GROWSDOWN);
+		vma->vm_file = hugefile;
+		vma->vm_flags |= VM_HUGETLB;
+		/* Stack randomization is not supported on huge pages */
+		set_personality(get_personality | ADDR_NO_RANDOMIZE);
+		vma->vm_start = vma->vm_end - (HPAGE_SIZE * stack_hpages);
+	} else {
+		vma->vm_start = vma->vm_end - PAGE_SIZE;
+	}
+
 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 	err = insert_vm_struct(mm, vma);
 	if (err) {
@@ -593,13 +660,12 @@ int setup_arg_pages(struct linux_binprm *bprm,
 	bprm->p = vma->vm_end - stack_shift;
 #else
 	stack_top = arch_align_stack(stack_top);
-	stack_top = PAGE_ALIGN(stack_top);
+	stack_top = personality_page_align(stack_top);
 	stack_shift = vma->vm_end - stack_top;
 
 	bprm->p -= stack_shift;
 	mm->arg_start = bprm->p;
 #endif
-
 	if (bprm->loader)
 		bprm->loader -= stack_shift;
 	bprm->exec -= stack_shift;
@@ -633,14 +699,17 @@ int setup_arg_pages(struct linux_binprm *bprm,
 		}
 	}
 
+	if (!(get_personality & HUGE_PAGE_STACK)) {
 #ifdef CONFIG_STACK_GROWSUP
-	stack_base = vma->vm_end + EXTRA_STACK_VM_PAGES * PAGE_SIZE;
+		stack_base = vma->vm_end + EXTRA_STACK_VM_PAGES * PAGE_SIZE;
 #else
-	stack_base = vma->vm_start - EXTRA_STACK_VM_PAGES * PAGE_SIZE;
+		stack_base = vma->vm_start - EXTRA_STACK_VM_PAGES * PAGE_SIZE;
 #endif
-	ret = expand_stack(vma, stack_base);
-	if (ret)
-		ret = -EFAULT;
+
+		ret = expand_stack(vma, stack_base);
+		if (ret)
+			ret = -EFAULT;
+	}
 
 out_unlock:
 	up_write(&mm->mmap_sem);
diff --git a/include/linux/personality.h b/include/linux/personality.h
index 012cd55..6ecebdf 100644
--- a/include/linux/personality.h
+++ b/include/linux/personality.h
@@ -22,6 +22,9 @@ extern int		__set_personality(unsigned long);
  * These occupy the top three bytes.
  */
 enum {
+	HUGE_PAGE_STACK = 	0x0020000,	/* Attempt to use a huge page
+						 * for the process stack
+						 */
 	ADDR_NO_RANDOMIZE = 	0x0040000,	/* disable randomization of VA space */
 	FDPIC_FUNCPTRS =	0x0080000,	/* userspace function ptrs point to descriptors
 						 * (signal handling)


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

             reply	other threads:[~2008-05-02  1:51 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-05-02  1:51 Eric B Munson [this message]
2008-05-02 17:11 ` Dave Hansen
2008-05-02 17:20   ` Dave Hansen
2008-05-02 21:52     ` Eric B Munson
2008-05-02 17:15 ` Dave Hansen
2008-05-02 21:44   ` Eric B Munson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1209693109.8483.23.camel@grover.beaverton.ibm.com \
    --to=ebmunson@us.ibm.com \
    --cc=andyw@linux.vnet.ibm.com \
    --cc=linux-mm@kvack.org \
    --cc=mel@csn.ul.ie \
    --cc=nacc@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox