linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Andi Kleen <andi@firstfloor.org>
To: linux-kernel@vger.kernel.org, pj@sgi.com, linux-mm@kvack.org,
	nickpiggin@yahoo.com.au
Subject: [PATCH] [6/18] Add support to have individual hstates for each hugetlbfs mount
Date: Mon, 17 Mar 2008 02:58:19 +0100 (CET)	[thread overview]
Message-ID: <20080317015819.E7ECB1B41E0@basil.firstfloor.org> (raw)
In-Reply-To: <20080317258.659191058@firstfloor.org>

- Add a new pagesize= option to the hugetlbfs mount that allows setting
the page size
- Set up pointers to a suitable hstate for the set page size option
to the super block and the inode and the vma.
- Change the hstate accessors to use this information
- Add code to the hstate init function to set parsed_hstate for command
line processing
- Handle duplicated hstate registrations to the make command line user proof

Signed-off-by: Andi Kleen <ak@suse.de>

---
 fs/hugetlbfs/inode.c    |   50 ++++++++++++++++++++++++++++++++++++++----------
 include/linux/hugetlb.h |   12 ++++++++---
 mm/hugetlb.c            |   22 +++++++++++++++++----
 3 files changed, 67 insertions(+), 17 deletions(-)

Index: linux/include/linux/hugetlb.h
===================================================================
--- linux.orig/include/linux/hugetlb.h
+++ linux/include/linux/hugetlb.h
@@ -134,6 +134,7 @@ struct hugetlbfs_config {
 	umode_t mode;
 	long	nr_blocks;
 	long	nr_inodes;
+	struct hstate *hstate;
 };
 
 struct hugetlbfs_sb_info {
@@ -142,12 +143,14 @@ struct hugetlbfs_sb_info {
 	long	max_inodes;   /* inodes allowed */
 	long	free_inodes;  /* inodes free */
 	spinlock_t	stat_lock;
+	struct hstate *hstate;
 };
 
 
 struct hugetlbfs_inode_info {
 	struct shared_policy policy;
 	struct inode vfs_inode;
+	struct hstate *hstate;
 };
 
 static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
@@ -212,6 +215,7 @@ struct hstate {
 };
 
 void __init huge_add_hstate(unsigned order);
+struct hstate *huge_lookup_hstate(unsigned long pagesize);
 
 #ifndef HUGE_MAX_HSTATE
 #define HUGE_MAX_HSTATE 1
@@ -223,17 +227,19 @@ extern struct hstate hstates[HUGE_MAX_HS
 
 static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
 {
-	return &global_hstate;
+	return (struct hstate *)vma->vm_private_data;
 }
 
 static inline struct hstate *hstate_file(struct file *f)
 {
-	return &global_hstate;
+	struct dentry *d = f->f_dentry;
+	struct inode *i = d->d_inode;
+	return HUGETLBFS_I(i)->hstate;
 }
 
 static inline struct hstate *hstate_inode(struct inode *i)
 {
-	return &global_hstate;
+	return HUGETLBFS_I(i)->hstate;
 }
 
 static inline unsigned huge_page_size(struct hstate *h)
Index: linux/fs/hugetlbfs/inode.c
===================================================================
--- linux.orig/fs/hugetlbfs/inode.c
+++ linux/fs/hugetlbfs/inode.c
@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group;
 enum {
 	Opt_size, Opt_nr_inodes,
 	Opt_mode, Opt_uid, Opt_gid,
+	Opt_pagesize,
 	Opt_err,
 };
 
@@ -62,6 +63,7 @@ static match_table_t tokens = {
 	{Opt_mode,	"mode=%o"},
 	{Opt_uid,	"uid=%u"},
 	{Opt_gid,	"gid=%u"},
+	{Opt_pagesize,	"pagesize=%s"},
 	{Opt_err,	NULL},
 };
 
@@ -92,6 +94,7 @@ static int hugetlbfs_file_mmap(struct fi
 	 */
 	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
 	vma->vm_ops = &hugetlb_vm_ops;
+	vma->vm_private_data = h;
 
 	if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT))
 		return -EINVAL;
@@ -530,6 +533,7 @@ static struct inode *hugetlbfs_get_inode
 			inode->i_op = &page_symlink_inode_operations;
 			break;
 		}
+		info->hstate = HUGETLBFS_SB(sb)->hstate;
 	}
 	return inode;
 }
@@ -750,6 +754,8 @@ hugetlbfs_parse_options(char *options, s
 	char *p, *rest;
 	substring_t args[MAX_OPT_ARGS];
 	int option;
+	unsigned long long size = 0;
+	enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE;
 
 	if (!options)
 		return 0;
@@ -780,17 +786,13 @@ hugetlbfs_parse_options(char *options, s
 			break;
 
 		case Opt_size: {
- 			unsigned long long size;
 			/* memparse() will accept a K/M/G without a digit */
 			if (!isdigit(*args[0].from))
 				goto bad_val;
 			size = memparse(args[0].from, &rest);
-			if (*rest == '%') {
-				size <<= HPAGE_SHIFT;
-				size *= max_huge_pages;
-				do_div(size, 100);
-			}
-			pconfig->nr_blocks = (size >> HPAGE_SHIFT);
+			setsize = SIZE_STD;
+			if (*rest == '%')
+				setsize = SIZE_PERCENT;
 			break;
 		}
 
@@ -801,6 +803,19 @@ hugetlbfs_parse_options(char *options, s
 			pconfig->nr_inodes = memparse(args[0].from, &rest);
 			break;
 
+		case Opt_pagesize: {
+			unsigned long ps;
+			ps = memparse(args[0].from, &rest);
+			pconfig->hstate = huge_lookup_hstate(ps);
+			if (!pconfig->hstate) {
+				printk(KERN_ERR
+				"hugetlbfs: Unsupported page size %lu MB\n",
+					ps >> 20);
+				return -EINVAL;
+			}
+			break;
+		}
+
 		default:
 			printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
 				 p);
@@ -808,6 +823,18 @@ hugetlbfs_parse_options(char *options, s
 			break;
 		}
 	}
+
+	/* Do size after hstate is set up */
+	if (setsize > NO_SIZE) {
+		struct hstate *h = pconfig->hstate;
+		if (setsize == SIZE_PERCENT) {
+			size <<= huge_page_shift(h);
+			size *= max_huge_pages[h - hstates];
+			do_div(size, 100);
+		}
+		pconfig->nr_blocks = (size >> huge_page_shift(h));
+	}
+
 	return 0;
 
 bad_val:
@@ -832,6 +859,7 @@ hugetlbfs_fill_super(struct super_block 
 	config.uid = current->fsuid;
 	config.gid = current->fsgid;
 	config.mode = 0755;
+	config.hstate = &global_hstate;
 	ret = hugetlbfs_parse_options(data, &config);
 	if (ret)
 		return ret;
@@ -840,14 +868,15 @@ hugetlbfs_fill_super(struct super_block 
 	if (!sbinfo)
 		return -ENOMEM;
 	sb->s_fs_info = sbinfo;
+	sbinfo->hstate = config.hstate;
 	spin_lock_init(&sbinfo->stat_lock);
 	sbinfo->max_blocks = config.nr_blocks;
 	sbinfo->free_blocks = config.nr_blocks;
 	sbinfo->max_inodes = config.nr_inodes;
 	sbinfo->free_inodes = config.nr_inodes;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
-	sb->s_blocksize = HPAGE_SIZE;
-	sb->s_blocksize_bits = HPAGE_SHIFT;
+	sb->s_blocksize = huge_page_size(config.hstate);
+	sb->s_blocksize_bits = huge_page_shift(config.hstate);
 	sb->s_magic = HUGETLBFS_MAGIC;
 	sb->s_op = &hugetlbfs_ops;
 	sb->s_time_gran = 1;
@@ -949,7 +978,8 @@ struct file *hugetlb_file_setup(const ch
 		goto out_dentry;
 
 	error = -ENOMEM;
-	if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT))
+	if (hugetlb_reserve_pages(inode, 0,
+			size >> huge_page_shift(hstate_inode(inode))))
 		goto out_inode;
 
 	d_instantiate(dentry, inode);
Index: linux/mm/hugetlb.c
===================================================================
--- linux.orig/mm/hugetlb.c
+++ linux/mm/hugetlb.c
@@ -143,7 +143,7 @@ static void update_and_free_page(struct 
 
 static void free_huge_page(struct page *page)
 {
-	struct hstate *h = &global_hstate;
+	struct hstate *h = huge_lookup_hstate(PAGE_SIZE << compound_order(page));
 	int nid = page_to_nid(page);
 	struct address_space *mapping;
 
@@ -519,7 +519,11 @@ module_init(hugetlb_init);
 /* Should be called on processing a hugepagesz=... option */
 void __init huge_add_hstate(unsigned order)
 {
-	struct hstate *h;
+	struct hstate *h = huge_lookup_hstate(PAGE_SIZE << order);
+	if (h) {
+		parsed_hstate = h;
+		return;
+	}
 	BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
 	BUG_ON(order <= HPAGE_SHIFT - PAGE_SHIFT);
 	h = &hstates[max_hstate++];
@@ -538,6 +542,16 @@ static int __init hugetlb_setup(char *s)
 }
 __setup("hugepages=", hugetlb_setup);
 
+struct hstate *huge_lookup_hstate(unsigned long pagesize)
+{
+	struct hstate *h;
+	for_each_hstate (h) {
+		if (huge_page_size(h) == pagesize)
+			return h;
+	}
+	return NULL;
+}
+
 static unsigned int cpuset_mems_nr(unsigned int *array)
 {
 	int node;
@@ -1345,7 +1359,7 @@ out:
 int hugetlb_reserve_pages(struct inode *inode, long from, long to)
 {
 	long ret, chg;
-	struct hstate *h = &global_hstate;
+	struct hstate *h = hstate_inode(inode);
 
 	chg = region_chg(&inode->i_mapping->private_list, from, to);
 	if (chg < 0)
@@ -1364,7 +1378,7 @@ int hugetlb_reserve_pages(struct inode *
 
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
 {
-	struct hstate *h = &global_hstate;
+	struct hstate *h = hstate_inode(inode);
 	long chg = region_truncate(&inode->i_mapping->private_list, offset);
 
 	spin_lock(&inode->i_lock);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2008-03-17  1:58 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-03-17  1:58 [PATCH] [0/18] GB pages hugetlb support Andi Kleen
2008-03-17  1:58 ` [PATCH] [1/18] Convert hugeltlb.c over to pass global state around in a structure Andi Kleen
2008-03-17 20:15   ` Adam Litke
2008-03-18 12:05   ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [2/18] Add basic support for more than one hstate in hugetlbfs Andi Kleen
2008-03-17 20:22   ` Adam Litke
2008-03-17 20:44     ` Andi Kleen
2008-03-18 12:23   ` Mel Gorman
2008-03-23 10:38   ` KOSAKI Motohiro
2008-03-23 11:28     ` Andi Kleen
2008-03-23 11:30       ` KOSAKI Motohiro
2008-03-17  1:58 ` [PATCH] [3/18] Convert /proc output code over to report multiple hstates Andi Kleen
2008-03-18 12:28   ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [4/18] Add basic support for more than one hstate in hugetlbfs Andi Kleen
2008-03-17  8:09   ` Paul Jackson
2008-03-17  8:15     ` Andi Kleen
2008-03-17 20:28   ` Adam Litke
2008-03-18 14:11   ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [5/18] Expand the hugetlbfs sysctls to handle arrays for all hstates Andi Kleen
2008-03-18 14:34   ` Mel Gorman
2008-03-18 16:49     ` Andi Kleen
2008-03-18 17:01       ` Mel Gorman
2008-03-17  1:58 ` Andi Kleen [this message]
2008-03-18 14:10   ` [PATCH] [6/18] Add support to have individual hstates for each hugetlbfs mount Adam Litke
2008-03-18 15:02   ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [7/18] Abstract out the NUMA node round robin code into a separate function Andi Kleen
2008-03-18 15:42   ` Mel Gorman
2008-03-18 15:47     ` Andi Kleen
2008-03-18 16:04       ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [8/18] Add a __alloc_bootmem_node_nopanic Andi Kleen
2008-03-18 15:54   ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [9/18] Export prep_compound_page to the hugetlb allocator Andi Kleen
2008-03-17  1:58 ` [PATCH] [10/18] Factor out new huge page preparation code into separate function Andi Kleen
2008-03-17 20:31   ` Adam Litke
2008-03-18 16:02   ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [11/18] Fix alignment bug in bootmem allocator Andi Kleen
2008-03-17  2:19   ` Yinghai Lu
2008-03-17  7:02     ` Andi Kleen
2008-03-17  7:17       ` Yinghai Lu
2008-03-17  7:31         ` Yinghai Lu
2008-03-17  7:41           ` Andi Kleen
2008-03-17  7:53             ` Yinghai Lu
2008-03-17  8:10               ` Yinghai Lu
2008-03-17  8:17                 ` Andi Kleen
2008-03-17  8:56               ` Andi Kleen
2008-03-17 18:52                 ` Yinghai Lu
2008-03-17 21:27                   ` Yinghai Lu
2008-03-18  2:06                     ` Yinghai Lu
2008-03-18 16:18   ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [12/18] Add support to allocate hugetlb pages that are larger than MAX_ORDER Andi Kleen
2008-03-18 16:27   ` Mel Gorman
2008-04-09 16:05   ` Andrew Hastings
2008-04-09 17:56     ` Andi Kleen
2008-03-17  1:58 ` [PATCH] [13/18] Add support to allocate hugepages of different size with hugepages= Andi Kleen
2008-03-18 16:32   ` Mel Gorman
2008-03-18 16:45     ` Andi Kleen
2008-03-18 16:46       ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [14/18] Clean up hugetlb boot time printk Andi Kleen
2008-03-18 16:37   ` Mel Gorman
2008-03-17  1:58 ` [PATCH] [15/18] Add support to x86-64 to allocate and lookup GB pages in hugetlb Andi Kleen
2008-03-17  1:58 ` [PATCH] [16/18] Add huge pud support to hugetlbfs Andi Kleen
2008-03-17  1:58 ` [PATCH] [17/18] Add huge pud support to mm/memory.c Andi Kleen
2008-03-17  1:58 ` [PATCH] [18/18] Implement hugepagesz= option for x86-64 Andi Kleen
2008-03-17  9:29   ` Paul Jackson
2008-03-17  9:59     ` Andi Kleen
2008-03-17 10:02       ` Paul Jackson
2008-03-17  3:11 ` [PATCH] [0/18] GB pages hugetlb support Paul Jackson
2008-03-17  7:00   ` Andi Kleen
2008-03-17  7:00     ` Paul Jackson
2008-03-17  7:29       ` Andi Kleen
2008-03-17  5:35 ` Paul Jackson
2008-03-17  6:58   ` Andi Kleen
2008-03-17  9:26 ` Paul Jackson
2008-03-17 15:05 ` Adam Litke
2008-03-17 15:33   ` Andi Kleen
2008-03-17 15:59     ` Adam Litke

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080317015819.E7ECB1B41E0@basil.firstfloor.org \
    --to=andi@firstfloor.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=nickpiggin@yahoo.com.au \
    --cc=pj@sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox