linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [RFC][PATCH 1/2] Add shared and reserve control to hugetlb_file_setup
@ 2008-05-02  1:51 Eric B Munson
  2008-05-02 16:16 ` Dave Hansen
  0 siblings, 1 reply; 5+ messages in thread
From: Eric B Munson @ 2008-05-02  1:51 UTC (permalink / raw)
  To: linux-mm; +Cc: nacc, mel, andyw

[-- Attachment #1: Type: text/plain, Size: 6734 bytes --]

In order to back stacks with huge pages, we will want to make hugetlbfs
files to back them; these will be used to back private mappings.
Currently hugetlb_file_setup creates files to back shared memory segments.
Modify this to create both private and shared files, and update callers
to the new signatures.

By not reserving requested huge pages for stack areas, we allow many programs to
have vma's which total to more huge pages than available on the system
without affecting eachother until they attempt to use all the pages.  This
will be the case with the proposed huge page backed stack patch in this
series.

Based on 2.6.25

Signed-off-by: Eric Munson <ebmunson@us.ibm.com>

---

 fs/hugetlbfs/inode.c    |   39 +++++++++++++++++++++++++--------------
 include/linux/hugetlb.h |   16 ++++++++++++++--
 ipc/shm.c               |    3 ++-
 3 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 6846785..8c0ba46 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -488,7 +488,8 @@ out:
 }
 
 static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 
-					gid_t gid, int mode, dev_t dev)
+					gid_t gid, int mode, dev_t dev,
+					unsigned long creat_flags)
 {
 	struct inode *inode;
 
@@ -504,7 +505,9 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		INIT_LIST_HEAD(&inode->i_mapping->private_list);
 		info = HUGETLBFS_I(inode);
-		mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL);
+		if (creat_flags & HUGETLB_SHARED)
+			mpol_shared_policy_init(&info->policy, MPOL_DEFAULT,
+						NULL);
 		switch (mode & S_IFMT) {
 		default:
 			init_special_inode(inode, mode, dev);
@@ -545,7 +548,8 @@ static int hugetlbfs_mknod(struct inode *dir,
 	} else {
 		gid = current->fsgid;
 	}
-	inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, gid, mode, dev);
+	inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, gid, mode, dev,
+					HUGETLB_SHARED | HUGETLB_RESERVE);
 	if (inode) {
 		dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 		d_instantiate(dentry, inode);
@@ -581,7 +585,8 @@ static int hugetlbfs_symlink(struct inode *dir,
 		gid = current->fsgid;
 
 	inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid,
-					gid, S_IFLNK|S_IRWXUGO, 0);
+					gid, S_IFLNK|S_IRWXUGO, 0,
+					HUGETLB_SHARED | HUGETLB_RESERVE);
 	if (inode) {
 		int l = strlen(symname)+1;
 		error = page_symlink(inode, symname, l);
@@ -845,7 +850,8 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_op = &hugetlbfs_ops;
 	sb->s_time_gran = 1;
 	inode = hugetlbfs_get_inode(sb, config.uid, config.gid,
-					S_IFDIR | config.mode, 0);
+					S_IFDIR | config.mode, 0,
+					HUGETLB_SHARED | HUGETLB_RESERVE);
 	if (!inode)
 		goto out_free;
 
@@ -910,7 +916,8 @@ static int can_do_hugetlb_shm(void)
 			can_do_mlock());
 }
 
-struct file *hugetlb_file_setup(const char *name, size_t size)
+struct file *hugetlb_file_setup(const char *name, size_t size,
+				unsigned long creat_flags)
 {
 	int error = -ENOMEM;
 	struct file *file;
@@ -921,11 +928,13 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
 	if (!hugetlbfs_vfsmount)
 		return ERR_PTR(-ENOENT);
 
-	if (!can_do_hugetlb_shm())
-		return ERR_PTR(-EPERM);
+	if (creat_flags & HUGETLB_SHARED) {
+		if (!can_do_hugetlb_shm())
+			return ERR_PTR(-EPERM);
 
-	if (!user_shm_lock(size, current->user))
-		return ERR_PTR(-ENOMEM);
+		if (!user_shm_lock(size, current->user))
+			return ERR_PTR(-ENOMEM);
+	}
 
 	root = hugetlbfs_vfsmount->mnt_root;
 	quick_string.name = name;
@@ -936,13 +945,14 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
 		goto out_shm_unlock;
 
 	error = -ENOSPC;
-	inode = hugetlbfs_get_inode(root->d_sb, current->fsuid,
-				current->fsgid, S_IFREG | S_IRWXUGO, 0);
+	inode = hugetlbfs_get_inode(root->d_sb, current->fsuid, current->fsgid,
+				    S_IFREG | S_IRWXUGO, 0, creat_flags);
 	if (!inode)
 		goto out_dentry;
 
 	error = -ENOMEM;
-	if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT))
+	if ((creat_flags & HUGETLB_RESERVE) &&
+		hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT))
 		goto out_inode;
 
 	d_instantiate(dentry, inode);
@@ -963,7 +973,8 @@ out_inode:
 out_dentry:
 	dput(dentry);
 out_shm_unlock:
-	user_shm_unlock(size, current->user);
+	if (creat_flags & HUGETLB_SHARED)
+		user_shm_unlock(size, current->user);
 	return ERR_PTR(error);
 }
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index addca4c..66b7a2b 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -165,12 +165,24 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
 
 extern const struct file_operations hugetlbfs_file_operations;
 extern struct vm_operations_struct hugetlb_vm_ops;
-struct file *hugetlb_file_setup(const char *name, size_t);
+struct file *hugetlb_file_setup(const char *name, size_t,
+				unsigned long creat_flags);
 int hugetlb_get_quota(struct address_space *mapping, long delta);
 void hugetlb_put_quota(struct address_space *mapping, long delta);
 
 #define BLOCKS_PER_HUGEPAGE	(HPAGE_SIZE / 512)
 
+#define HUGETLB_SHARED  0x00000001UL	/* Make the huge pages backed by the
+					 * file being created shared */
+
+#define HUGETLB_RESERVE 0x00000002UL	/* Reserve the huge pages backed by the
+					 * new file */
+
+#define HUGETLB_STACK_FILE "hugetlb-stack"
+
+/* to align the pointer to the (next) huge page boundary */
+#define HPAGE_ALIGN(addr)	(((addr)+HPAGE_SIZE-1)&HPAGE_MASK)
+
 static inline int is_file_hugepages(struct file *file)
 {
 	if (file->f_op == &hugetlbfs_file_operations)
@@ -189,7 +201,7 @@ static inline void set_file_hugepages(struct file *file)
 
 #define is_file_hugepages(file)		0
 #define set_file_hugepages(file)	BUG()
-#define hugetlb_file_setup(name,size)	ERR_PTR(-ENOSYS)
+#define hugetlb_file_setup(name,size,creat_flags)	ERR_PTR(-ENOSYS)
 
 #endif /* !CONFIG_HUGETLBFS */
 
diff --git a/ipc/shm.c b/ipc/shm.c
index cc63fae..38941eb 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -401,7 +401,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	sprintf (name, "SYSV%08x", key);
 	if (shmflg & SHM_HUGETLB) {
 		/* hugetlb_file_setup takes care of mlock user accounting */
-		file = hugetlb_file_setup(name, size);
+		file = hugetlb_file_setup(name, size,
+					  HUGETLB_SHARED | HUGETLB_RESERVE);
 		shp->mlock_user = current->user;
 	} else {
 		int acctflag = VM_ACCOUNT;


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC][PATCH 1/2] Add shared and reserve control to hugetlb_file_setup
  2008-05-02  1:51 [RFC][PATCH 1/2] Add shared and reserve control to hugetlb_file_setup Eric B Munson
@ 2008-05-02 16:16 ` Dave Hansen
  2008-05-02 21:55   ` Eric B Munson
  2008-05-05 10:58   ` Mel Gorman
  0 siblings, 2 replies; 5+ messages in thread
From: Dave Hansen @ 2008-05-02 16:16 UTC (permalink / raw)
  To: ebmunson; +Cc: linux-mm, nacc, mel, andyw

On Thu, 2008-05-01 at 18:51 -0700, Eric B Munson wrote:
> In order to back stacks with huge pages, we will want to make hugetlbfs
> files to back them; these will be used to back private mappings.
> Currently hugetlb_file_setup creates files to back shared memory segments.
> Modify this to create both private and shared files,

Hugetlbfs can currently have private mappings, right?  Why not just use
the existing ones instead of creating a new variety with
hugetlb_file_setup()?

-- Dave

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC][PATCH 1/2] Add shared and reserve control to hugetlb_file_setup
  2008-05-02 16:16 ` Dave Hansen
@ 2008-05-02 21:55   ` Eric B Munson
  2008-05-05 10:58   ` Mel Gorman
  1 sibling, 0 replies; 5+ messages in thread
From: Eric B Munson @ 2008-05-02 21:55 UTC (permalink / raw)
  To: Dave Hansen; +Cc: linux-mm, nacc, mel, andyw

[-- Attachment #1: Type: text/plain, Size: 778 bytes --]

On Fri, 2008-05-02 at 09:16 -0700, Dave Hansen wrote:
> On Thu, 2008-05-01 at 18:51 -0700, Eric B Munson wrote:
> > In order to back stacks with huge pages, we will want to make hugetlbfs
> > files to back them; these will be used to back private mappings.
> > Currently hugetlb_file_setup creates files to back shared memory segments.
> > Modify this to create both private and shared files,
> 
> Hugetlbfs can currently have private mappings, right?  Why not just use
> the existing ones instead of creating a new variety with
> hugetlb_file_setup()?
> 
> -- Dave
> 

Currently the only way to create a private mapping of a huge page is to
have the file system mounted.  This change allows a huge page private
mapping without mounting the filesystem.

Eric

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC][PATCH 1/2] Add shared and reserve control to hugetlb_file_setup
  2008-05-02 16:16 ` Dave Hansen
  2008-05-02 21:55   ` Eric B Munson
@ 2008-05-05 10:58   ` Mel Gorman
  2008-05-06 17:23     ` Dave Hansen
  1 sibling, 1 reply; 5+ messages in thread
From: Mel Gorman @ 2008-05-05 10:58 UTC (permalink / raw)
  To: Dave Hansen; +Cc: ebmunson, linux-mm, nacc, andyw

On (02/05/08 09:16), Dave Hansen didst pronounce:
> On Thu, 2008-05-01 at 18:51 -0700, Eric B Munson wrote:
> > In order to back stacks with huge pages, we will want to make hugetlbfs
> > files to back them; these will be used to back private mappings.
> > Currently hugetlb_file_setup creates files to back shared memory segments.
> > Modify this to create both private and shared files,
> 
> Hugetlbfs can currently have private mappings, right?  Why not just use
> the existing ones instead of creating a new variety with
> hugetlb_file_setup()?
> 

hugetlb_file_setup() uses an internal mount to create files just for
SHM. However, it does the work necessary for MAP_SHARED mappings,
particularly reserve pages. The account is currently all fouled up to
deal with a private mapping that has reserves. Teaching
hugetlb_file_setup() to deal with private and shared mappings does
appear the most straight-forward route.

-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC][PATCH 1/2] Add shared and reserve control to hugetlb_file_setup
  2008-05-05 10:58   ` Mel Gorman
@ 2008-05-06 17:23     ` Dave Hansen
  0 siblings, 0 replies; 5+ messages in thread
From: Dave Hansen @ 2008-05-06 17:23 UTC (permalink / raw)
  To: Mel Gorman; +Cc: ebmunson, linux-mm, nacc, andyw

On Mon, 2008-05-05 at 11:58 +0100, Mel Gorman wrote:
> On (02/05/08 09:16), Dave Hansen didst pronounce:
> > On Thu, 2008-05-01 at 18:51 -0700, Eric B Munson wrote:
> > > In order to back stacks with huge pages, we will want to make hugetlbfs
> > > files to back them; these will be used to back private mappings.
> > > Currently hugetlb_file_setup creates files to back shared memory segments.
> > > Modify this to create both private and shared files,
> > 
> > Hugetlbfs can currently have private mappings, right?  Why not just use
> > the existing ones instead of creating a new variety with
> > hugetlb_file_setup()?
> > 
> 
> hugetlb_file_setup() uses an internal mount to create files just for
> SHM. However, it does the work necessary for MAP_SHARED mappings,
> particularly reserve pages. The account is currently all fouled up to
> deal with a private mapping that has reserves. Teaching
> hugetlb_file_setup() to deal with private and shared mappings does
> appear the most straight-forward route.

I agree that this is the most straightforward route, especially for a
proof of concept like this.  However, I worry that it is not a good
route for merging since it doesn't really put us on the road to a more
comprehensive solution.  How easy is it to extend this code for stack
growth or randomization, for instance?  Can we make this solve any other
problems?  Is there any way (or reason) to do generic file-backed
stacks?

Does anybody know of any important cases of applications changing their
rlimits after exec()?

In any case, it looks like I'm the only one objecting to it, so let's
try to get it a better changelog.  How about this for a summary?

There are two kinds of "Shared" hugetlbfs mappings:
1. using internal vfsmount use ipc/shm.c and shmctl()
2. mmap() of /hugetlbfs/file with MAP_SHARED

There is one kind of private: mmap() of /hugetlbfs/file file with
MAP_PRIVATE

(Eric could you fill in what the current reservation and prefaulting
rules are and what you expect from the new code?)

This patch adds a second class of "private" hugetlb-backed mapping.  But
we do it by sharing code with the ipc shm.  This is mostly because we
need to do our stack setup at execve() time and can't go opening files
from hugetlbfs.  The kernel-internal vfsmount for shm lets us get around
this.  We truly want anonymous memory, but MAP_PRIVATE is close enough
for now.

The hugetlb stack VMA is set up at execve() time and is fixed in size.
We derive the size from looking at the stack ulimit.  The stack pages
are faulted in as needed, but the stack VMA stays fixed in size.

-- Dave

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2008-05-06 17:23 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-05-02  1:51 [RFC][PATCH 1/2] Add shared and reserve control to hugetlb_file_setup Eric B Munson
2008-05-02 16:16 ` Dave Hansen
2008-05-02 21:55   ` Eric B Munson
2008-05-05 10:58   ` Mel Gorman
2008-05-06 17:23     ` Dave Hansen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox