From: npiggin@suse.de
To: akpm@linux-foundation.org
Cc: linux-mm@kvack.org, andi@firstfloor.org,
kniht@linux.vnet.ibm.com, nacc@us.ibm.com, abh@cray.com,
wli@holomorphy.com
Subject: [patch 07/18] hugetlbfs: per mount hstates
Date: Wed, 23 Apr 2008 11:53:09 +1000 [thread overview]
Message-ID: <20080423015430.378900000@nick.local0.net> (raw)
In-Reply-To: <20080423015302.745723000@nick.local0.net>
[-- Attachment #1: hugetlbfs-per-mount-hstate.patch --]
[-- Type: text/plain, Size: 7997 bytes --]
Add support to have individual hstates for each hugetlbfs mount
- Add a new pagesize= option to the hugetlbfs mount that allows setting
the page size
- Set up pointers to a suitable hstate for the set page size option
to the super block and the inode and the vma.
- Change the hstate accessors to use this information
- Add code to the hstate init function to set parsed_hstate for command
line processing
- Handle duplicated hstate registrations to the make command line user proof
[np: take hstate out of hugetlbfs inode and vma->vm_private_data]
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
---
fs/hugetlbfs/inode.c | 48 ++++++++++++++++++++++++++++++++++++++----------
include/linux/hugetlb.h | 14 +++++++++-----
mm/hugetlb.c | 16 +++-------------
mm/memory.c | 18 ++++++++++++++++--
4 files changed, 66 insertions(+), 30 deletions(-)
Index: linux-2.6/include/linux/hugetlb.h
===================================================================
--- linux-2.6.orig/include/linux/hugetlb.h
+++ linux-2.6/include/linux/hugetlb.h
@@ -136,6 +136,7 @@ struct hugetlbfs_config {
umode_t mode;
long nr_blocks;
long nr_inodes;
+ struct hstate *hstate;
};
struct hugetlbfs_sb_info {
@@ -144,6 +145,7 @@ struct hugetlbfs_sb_info {
long max_inodes; /* inodes allowed */
long free_inodes; /* inodes free */
spinlock_t stat_lock;
+ struct hstate *hstate;
};
@@ -226,19 +228,21 @@ extern struct hstate hstates[HUGE_MAX_HS
#define global_hstate (hstates[0])
-static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
+static inline struct hstate *hstate_inode(struct inode *i)
{
- return &global_hstate;
+ struct hugetlbfs_sb_info *hsb;
+ hsb = HUGETLBFS_SB(i->i_sb);
+ return hsb->hstate;
}
static inline struct hstate *hstate_file(struct file *f)
{
- return &global_hstate;
+ return hstate_inode(f->f_dentry->d_inode);
}
-static inline struct hstate *hstate_inode(struct inode *i)
+static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
{
- return &global_hstate;
+ return hstate_file(vma->vm_file);
}
static inline unsigned long huge_page_size(struct hstate *h)
Index: linux-2.6/fs/hugetlbfs/inode.c
===================================================================
--- linux-2.6.orig/fs/hugetlbfs/inode.c
+++ linux-2.6/fs/hugetlbfs/inode.c
@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group;
enum {
Opt_size, Opt_nr_inodes,
Opt_mode, Opt_uid, Opt_gid,
+ Opt_pagesize,
Opt_err,
};
@@ -62,6 +63,7 @@ static match_table_t tokens = {
{Opt_mode, "mode=%o"},
{Opt_uid, "uid=%u"},
{Opt_gid, "gid=%u"},
+ {Opt_pagesize, "pagesize=%s"},
{Opt_err, NULL},
};
@@ -750,6 +752,8 @@ hugetlbfs_parse_options(char *options, s
char *p, *rest;
substring_t args[MAX_OPT_ARGS];
int option;
+ unsigned long long size = 0;
+ enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE;
if (!options)
return 0;
@@ -780,17 +784,13 @@ hugetlbfs_parse_options(char *options, s
break;
case Opt_size: {
- unsigned long long size;
/* memparse() will accept a K/M/G without a digit */
if (!isdigit(*args[0].from))
goto bad_val;
size = memparse(args[0].from, &rest);
- if (*rest == '%') {
- size <<= HPAGE_SHIFT;
- size *= max_huge_pages;
- do_div(size, 100);
- }
- pconfig->nr_blocks = (size >> HPAGE_SHIFT);
+ setsize = SIZE_STD;
+ if (*rest == '%')
+ setsize = SIZE_PERCENT;
break;
}
@@ -801,6 +801,19 @@ hugetlbfs_parse_options(char *options, s
pconfig->nr_inodes = memparse(args[0].from, &rest);
break;
+ case Opt_pagesize: {
+ unsigned long ps;
+ ps = memparse(args[0].from, &rest);
+ pconfig->hstate = size_to_hstate(ps);
+ if (!pconfig->hstate) {
+ printk(KERN_ERR
+ "hugetlbfs: Unsupported page size %lu MB\n",
+ ps >> 20);
+ return -EINVAL;
+ }
+ break;
+ }
+
default:
printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
p);
@@ -808,6 +821,18 @@ hugetlbfs_parse_options(char *options, s
break;
}
}
+
+ /* Do size after hstate is set up */
+ if (setsize > NO_SIZE) {
+ struct hstate *h = pconfig->hstate;
+ if (setsize == SIZE_PERCENT) {
+ size <<= huge_page_shift(h);
+ size *= h->max_huge_pages;
+ do_div(size, 100);
+ }
+ pconfig->nr_blocks = (size >> huge_page_shift(h));
+ }
+
return 0;
bad_val:
@@ -832,6 +857,7 @@ hugetlbfs_fill_super(struct super_block
config.uid = current->fsuid;
config.gid = current->fsgid;
config.mode = 0755;
+ config.hstate = size_to_hstate(HPAGE_SIZE);
ret = hugetlbfs_parse_options(data, &config);
if (ret)
return ret;
@@ -840,14 +866,15 @@ hugetlbfs_fill_super(struct super_block
if (!sbinfo)
return -ENOMEM;
sb->s_fs_info = sbinfo;
+ sbinfo->hstate = config.hstate;
spin_lock_init(&sbinfo->stat_lock);
sbinfo->max_blocks = config.nr_blocks;
sbinfo->free_blocks = config.nr_blocks;
sbinfo->max_inodes = config.nr_inodes;
sbinfo->free_inodes = config.nr_inodes;
sb->s_maxbytes = MAX_LFS_FILESIZE;
- sb->s_blocksize = HPAGE_SIZE;
- sb->s_blocksize_bits = HPAGE_SHIFT;
+ sb->s_blocksize = huge_page_size(config.hstate);
+ sb->s_blocksize_bits = huge_page_shift(config.hstate);
sb->s_magic = HUGETLBFS_MAGIC;
sb->s_op = &hugetlbfs_ops;
sb->s_time_gran = 1;
@@ -949,7 +976,8 @@ struct file *hugetlb_file_setup(const ch
goto out_dentry;
error = -ENOMEM;
- if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT))
+ if (hugetlb_reserve_pages(inode, 0,
+ size >> huge_page_shift(hstate_inode(inode))))
goto out_inode;
d_instantiate(dentry, inode);
Index: linux-2.6/mm/hugetlb.c
===================================================================
--- linux-2.6.orig/mm/hugetlb.c
+++ linux-2.6/mm/hugetlb.c
@@ -934,19 +934,9 @@ void __unmap_hugepage_range(struct vm_ar
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
- /*
- * It is undesirable to test vma->vm_file as it should be non-null
- * for valid hugetlb area. However, vm_file will be NULL in the error
- * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails,
- * do_mmap_pgoff() nullifies vma->vm_file before calling this function
- * to clean up. Since no pte has actually been setup, it is safe to
- * do nothing in this case.
- */
- if (vma->vm_file) {
- spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
- __unmap_hugepage_range(vma, start, end);
- spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
- }
+ spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+ __unmap_hugepage_range(vma, start, end);
+ spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
}
static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -846,9 +846,23 @@ unsigned long unmap_vmas(struct mmu_gath
}
if (unlikely(is_vm_hugetlb_page(vma))) {
- unmap_hugepage_range(vma, start, end);
- zap_work -= (end - start) /
+ /*
+ * It is undesirable to test vma->vm_file as it
+ * should be non-null for valid hugetlb area.
+ * However, vm_file will be NULL in the error
+ * cleanup path of do_mmap_pgoff. When
+ * hugetlbfs ->mmap method fails,
+ * do_mmap_pgoff() nullifies vma->vm_file
+ * before calling this function to clean up.
+ * Since no pte has actually been setup, it is
+ * safe to do nothing in this case.
+ */
+ if (vma->vm_file) {
+ unmap_hugepage_range(vma, start, end);
+ zap_work -= (end - start) /
(1 << huge_page_order(hstate_vma(vma)));
+ }
+
start = end;
} else
start = unmap_page_range(*tlbp, vma,
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2008-04-23 1:53 UTC|newest]
Thread overview: 123+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-04-23 1:53 [patch 00/18] multi size, and giant hugetlb page support, 1GB hugetlb for x86 npiggin
2008-04-23 1:53 ` [patch 01/18] hugetlb: fix lockdep spew npiggin
2008-04-23 13:06 ` KOSAKI Motohiro
2008-04-23 1:53 ` [patch 02/18] hugetlb: factor out huge_new_page npiggin
2008-04-24 23:49 ` Nishanth Aravamudan
2008-04-24 23:54 ` Nishanth Aravamudan
2008-04-24 23:58 ` Nishanth Aravamudan
2008-04-25 7:10 ` Andi Kleen
2008-04-25 16:54 ` Nishanth Aravamudan
2008-04-25 19:13 ` Christoph Lameter
2008-04-25 19:29 ` Nishanth Aravamudan
2008-04-30 19:16 ` Christoph Lameter
2008-04-30 20:44 ` Nishanth Aravamudan
2008-05-01 19:23 ` Christoph Lameter
2008-05-01 20:25 ` Nishanth Aravamudan
2008-05-01 20:34 ` Christoph Lameter
2008-05-01 21:01 ` Nishanth Aravamudan
2008-05-23 5:03 ` Nick Piggin
2008-04-23 1:53 ` [patch 03/18] mm: offset align in alloc_bootmem npiggin, Yinghai Lu
2008-04-23 1:53 ` [patch 04/18] hugetlb: modular state npiggin
2008-04-23 15:21 ` Jon Tollefson
2008-04-23 15:38 ` Nick Piggin
2008-04-25 17:13 ` Nishanth Aravamudan
2008-05-23 5:02 ` Nick Piggin
2008-05-23 20:48 ` Nishanth Aravamudan
2008-04-23 1:53 ` [patch 05/18] hugetlb: multiple hstates npiggin
2008-04-25 17:38 ` Nishanth Aravamudan
2008-04-25 17:48 ` Nishanth Aravamudan
2008-04-25 17:55 ` Andi Kleen
2008-04-25 17:52 ` Nishanth Aravamudan
2008-04-25 18:10 ` Andi Kleen
2008-04-28 10:13 ` Andy Whitcroft
2008-05-23 5:18 ` Nick Piggin
2008-04-29 17:27 ` Nishanth Aravamudan
2008-05-23 5:19 ` Nick Piggin
2008-04-23 1:53 ` [patch 06/18] hugetlb: multi hstate proc files npiggin
2008-05-02 19:53 ` Nishanth Aravamudan
2008-05-23 5:22 ` Nick Piggin
2008-05-23 20:30 ` Nishanth Aravamudan
2008-04-23 1:53 ` npiggin [this message]
2008-04-25 18:09 ` [patch 07/18] hugetlbfs: per mount hstates Nishanth Aravamudan
2008-04-25 20:36 ` Nishanth Aravamudan
2008-04-25 22:39 ` Nishanth Aravamudan
2008-04-28 18:20 ` Adam Litke
2008-04-28 18:46 ` Nishanth Aravamudan
2008-05-23 5:24 ` Nick Piggin
2008-05-23 20:34 ` Nishanth Aravamudan
2008-05-23 22:49 ` Nick Piggin
2008-05-23 23:24 ` Nishanth Aravamudan
2008-04-23 1:53 ` [patch 08/18] hugetlb: multi hstate sysctls npiggin
2008-04-25 18:14 ` Nishanth Aravamudan
2008-05-23 5:25 ` Nick Piggin
2008-05-23 20:27 ` Nishanth Aravamudan
2008-04-25 23:35 ` Nishanth Aravamudan
2008-05-23 5:28 ` Nick Piggin
2008-05-23 10:40 ` Andi Kleen
2008-04-23 1:53 ` [patch 09/18] hugetlb: abstract numa round robin selection npiggin
2008-04-23 1:53 ` [patch 10/18] mm: introduce non panic alloc_bootmem npiggin
2008-04-23 1:53 ` [patch 11/18] mm: export prep_compound_page to mm npiggin
2008-04-23 16:12 ` Andrew Hastings
2008-05-23 5:29 ` Nick Piggin
2008-04-23 1:53 ` [patch 12/18] hugetlbfs: support larger than MAX_ORDER npiggin
2008-04-23 16:15 ` Andrew Hastings
2008-04-23 16:25 ` Andi Kleen
2008-04-25 18:55 ` Nishanth Aravamudan
2008-05-23 5:29 ` Nick Piggin
2008-04-30 21:01 ` Dave Hansen
2008-05-23 5:30 ` Nick Piggin
2008-04-23 1:53 ` [patch 13/18] hugetlb: support boot allocate different sizes npiggin
2008-04-23 16:15 ` Andrew Hastings
2008-04-25 18:40 ` Nishanth Aravamudan
2008-04-25 18:50 ` Andi Kleen
2008-04-25 20:05 ` Nishanth Aravamudan
2008-05-23 5:36 ` Nick Piggin
2008-05-23 6:04 ` Nick Piggin
2008-05-23 20:32 ` Nishanth Aravamudan
2008-05-23 22:45 ` Nick Piggin
2008-05-23 22:53 ` Nishanth Aravamudan
2008-04-23 1:53 ` [patch 14/18] hugetlb: printk cleanup npiggin
2008-04-27 3:32 ` Nishanth Aravamudan
2008-05-23 5:37 ` Nick Piggin
2008-04-23 1:53 ` [patch 15/18] hugetlb: introduce huge_pud npiggin
2008-04-23 1:53 ` [patch 16/18] x86: support GB hugepages on 64-bit npiggin
2008-04-23 1:53 ` [patch 17/18] x86: add hugepagesz option " npiggin
2008-04-30 19:34 ` Nishanth Aravamudan
2008-04-30 19:52 ` Andi Kleen
2008-04-30 20:02 ` Nishanth Aravamudan
2008-04-30 20:19 ` Andi Kleen
2008-04-30 20:23 ` Nishanth Aravamudan
2008-04-30 20:45 ` Andi Kleen
2008-04-30 20:51 ` Nishanth Aravamudan
2008-04-30 20:40 ` Jon Tollefson
2008-04-30 20:48 ` Nishanth Aravamudan
2008-05-23 5:41 ` Nick Piggin
2008-05-23 10:43 ` Andi Kleen
2008-05-23 12:34 ` Nick Piggin
2008-05-23 14:29 ` Andi Kleen
2008-05-23 20:43 ` Nishanth Aravamudan
2008-05-23 20:39 ` Nishanth Aravamudan
2008-05-23 22:52 ` Nick Piggin
2008-04-23 1:53 ` [patch 18/18] hugetlb: my fixes 2 npiggin
2008-04-23 10:48 ` Andi Kleen
2008-04-23 15:36 ` Nick Piggin
2008-04-23 18:49 ` Nishanth Aravamudan
2008-04-23 19:37 ` Andi Kleen
2008-04-23 21:11 ` Nishanth Aravamudan
2008-04-23 21:38 ` Nishanth Aravamudan
2008-04-23 22:06 ` Dave Hansen
2008-04-23 15:20 ` Jon Tollefson
2008-04-23 15:44 ` Nick Piggin
2008-04-23 8:05 ` [patch 00/18] multi size, and giant hugetlb page support, 1GB hugetlb for x86 Andi Kleen
2008-04-23 15:34 ` Nick Piggin
2008-04-23 15:46 ` Andi Kleen
2008-04-23 15:53 ` Nick Piggin
2008-04-23 16:02 ` Andi Kleen
2008-04-23 16:02 ` Nick Piggin
2008-04-23 18:54 ` Nishanth Aravamudan
2008-04-23 18:52 ` Nishanth Aravamudan
2008-04-24 2:08 ` Nick Piggin
2008-04-24 6:43 ` Nishanth Aravamudan
2008-04-24 7:06 ` Nick Piggin
2008-04-24 17:08 ` Nishanth Aravamudan
2008-04-23 18:43 ` Nishanth Aravamudan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080423015430.378900000@nick.local0.net \
--to=npiggin@suse.de \
--cc=abh@cray.com \
--cc=akpm@linux-foundation.org \
--cc=andi@firstfloor.org \
--cc=kniht@linux.vnet.ibm.com \
--cc=linux-mm@kvack.org \
--cc=nacc@us.ibm.com \
--cc=wli@holomorphy.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox