linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: npiggin@suse.de
To: akpm@linux-foundation.org
Cc: linux-mm@kvack.org
Subject: [patch 04/21] hugetlbfs: per mount huge page sizes
Date: Wed, 04 Jun 2008 21:29:43 +1000	[thread overview]
Message-ID: <20080604113111.521975017@amd.local0.net> (raw)
In-Reply-To: <20080604112939.789444496@amd.local0.net>

[-- Attachment #1: hugetlbfs-per-mount-hstate.patch --]
[-- Type: text/plain, Size: 8108 bytes --]

Add the ability to configure the hugetlb hstate used on a per mount basis.

- Add a new pagesize= option to the hugetlbfs mount that allows setting
  the page size
- This option causes the mount code to find the hstate corresponding to the
  specified size, and sets up a pointer to the hstate in the mount's
  superblock.
- Change the hstate accessors to use this information rather than the
  global_hstate they were using (requires a slight change in mm/memory.c
  so we don't NULL deref in the error-unmap path -- see comments).

[np: take hstate out of hugetlbfs inode and vma->vm_private_data]

Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
---
 fs/hugetlbfs/inode.c    |   48 ++++++++++++++++++++++++++++++++++++++----------
 include/linux/hugetlb.h |   14 +++++++++-----
 mm/hugetlb.c            |   16 +++-------------
 mm/memory.c             |   18 ++++++++++++++++--
 4 files changed, 66 insertions(+), 30 deletions(-)

Index: linux-2.6/include/linux/hugetlb.h
===================================================================
--- linux-2.6.orig/include/linux/hugetlb.h	2008-06-04 20:51:18.000000000 +1000
+++ linux-2.6/include/linux/hugetlb.h	2008-06-04 20:51:19.000000000 +1000
@@ -100,6 +100,7 @@ struct hugetlbfs_config {
 	umode_t mode;
 	long	nr_blocks;
 	long	nr_inodes;
+	struct hstate *hstate;
 };
 
 struct hugetlbfs_sb_info {
@@ -108,6 +109,7 @@ struct hugetlbfs_sb_info {
 	long	max_inodes;   /* inodes allowed */
 	long	free_inodes;  /* inodes free */
 	spinlock_t	stat_lock;
+	struct hstate *hstate;
 };
 
 
@@ -191,19 +193,21 @@ extern unsigned int default_hstate_idx;
 
 #define default_hstate (hstates[default_hstate_idx])
 
-static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
+static inline struct hstate *hstate_inode(struct inode *i)
 {
-	return &default_hstate;
+	struct hugetlbfs_sb_info *hsb;
+	hsb = HUGETLBFS_SB(i->i_sb);
+	return hsb->hstate;
 }
 
 static inline struct hstate *hstate_file(struct file *f)
 {
-	return &default_hstate;
+	return hstate_inode(f->f_dentry->d_inode);
 }
 
-static inline struct hstate *hstate_inode(struct inode *i)
+static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
 {
-	return &default_hstate;
+	return hstate_file(vma->vm_file);
 }
 
 static inline unsigned long huge_page_size(struct hstate *h)
Index: linux-2.6/fs/hugetlbfs/inode.c
===================================================================
--- linux-2.6.orig/fs/hugetlbfs/inode.c	2008-06-04 20:51:18.000000000 +1000
+++ linux-2.6/fs/hugetlbfs/inode.c	2008-06-04 20:51:19.000000000 +1000
@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group;
 enum {
 	Opt_size, Opt_nr_inodes,
 	Opt_mode, Opt_uid, Opt_gid,
+	Opt_pagesize,
 	Opt_err,
 };
 
@@ -62,6 +63,7 @@ static match_table_t tokens = {
 	{Opt_mode,	"mode=%o"},
 	{Opt_uid,	"uid=%u"},
 	{Opt_gid,	"gid=%u"},
+	{Opt_pagesize,	"pagesize=%s"},
 	{Opt_err,	NULL},
 };
 
@@ -750,6 +752,8 @@ hugetlbfs_parse_options(char *options, s
 	char *p, *rest;
 	substring_t args[MAX_OPT_ARGS];
 	int option;
+	unsigned long long size = 0;
+	enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE;
 
 	if (!options)
 		return 0;
@@ -780,17 +784,13 @@ hugetlbfs_parse_options(char *options, s
 			break;
 
 		case Opt_size: {
- 			unsigned long long size;
 			/* memparse() will accept a K/M/G without a digit */
 			if (!isdigit(*args[0].from))
 				goto bad_val;
 			size = memparse(args[0].from, &rest);
-			if (*rest == '%') {
-				size <<= HPAGE_SHIFT;
-				size *= max_huge_pages;
-				do_div(size, 100);
-			}
-			pconfig->nr_blocks = (size >> HPAGE_SHIFT);
+			setsize = SIZE_STD;
+			if (*rest == '%')
+				setsize = SIZE_PERCENT;
 			break;
 		}
 
@@ -801,6 +801,19 @@ hugetlbfs_parse_options(char *options, s
 			pconfig->nr_inodes = memparse(args[0].from, &rest);
 			break;
 
+		case Opt_pagesize: {
+			unsigned long ps;
+			ps = memparse(args[0].from, &rest);
+			pconfig->hstate = size_to_hstate(ps);
+			if (!pconfig->hstate) {
+				printk(KERN_ERR
+				"hugetlbfs: Unsupported page size %lu MB\n",
+					ps >> 20);
+				return -EINVAL;
+			}
+			break;
+		}
+
 		default:
 			printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
 				 p);
@@ -808,6 +821,18 @@ hugetlbfs_parse_options(char *options, s
 			break;
 		}
 	}
+
+	/* Do size after hstate is set up */
+	if (setsize > NO_SIZE) {
+		struct hstate *h = pconfig->hstate;
+		if (setsize == SIZE_PERCENT) {
+			size <<= huge_page_shift(h);
+			size *= h->max_huge_pages;
+			do_div(size, 100);
+		}
+		pconfig->nr_blocks = (size >> huge_page_shift(h));
+	}
+
 	return 0;
 
 bad_val:
@@ -832,6 +857,7 @@ hugetlbfs_fill_super(struct super_block 
 	config.uid = current->fsuid;
 	config.gid = current->fsgid;
 	config.mode = 0755;
+	config.hstate = &default_hstate;
 	ret = hugetlbfs_parse_options(data, &config);
 	if (ret)
 		return ret;
@@ -840,14 +866,15 @@ hugetlbfs_fill_super(struct super_block 
 	if (!sbinfo)
 		return -ENOMEM;
 	sb->s_fs_info = sbinfo;
+	sbinfo->hstate = config.hstate;
 	spin_lock_init(&sbinfo->stat_lock);
 	sbinfo->max_blocks = config.nr_blocks;
 	sbinfo->free_blocks = config.nr_blocks;
 	sbinfo->max_inodes = config.nr_inodes;
 	sbinfo->free_inodes = config.nr_inodes;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
-	sb->s_blocksize = HPAGE_SIZE;
-	sb->s_blocksize_bits = HPAGE_SHIFT;
+	sb->s_blocksize = huge_page_size(config.hstate);
+	sb->s_blocksize_bits = huge_page_shift(config.hstate);
 	sb->s_magic = HUGETLBFS_MAGIC;
 	sb->s_op = &hugetlbfs_ops;
 	sb->s_time_gran = 1;
Index: linux-2.6/mm/hugetlb.c
===================================================================
--- linux-2.6.orig/mm/hugetlb.c	2008-06-04 20:51:18.000000000 +1000
+++ linux-2.6/mm/hugetlb.c	2008-06-04 20:51:19.000000000 +1000
@@ -1334,19 +1334,9 @@ void __unmap_hugepage_range(struct vm_ar
 void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 			  unsigned long end, struct page *ref_page)
 {
-	/*
-	 * It is undesirable to test vma->vm_file as it should be non-null
-	 * for valid hugetlb area. However, vm_file will be NULL in the error
-	 * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails,
-	 * do_mmap_pgoff() nullifies vma->vm_file before calling this function
-	 * to clean up. Since no pte has actually been setup, it is safe to
-	 * do nothing in this case.
-	 */
-	if (vma->vm_file) {
-		spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
-		__unmap_hugepage_range(vma, start, end, ref_page);
-		spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
-	}
+	spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+	__unmap_hugepage_range(vma, start, end, ref_page);
+	spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
 }
 
 /*
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c	2008-06-04 20:51:18.000000000 +1000
+++ linux-2.6/mm/memory.c	2008-06-04 20:51:19.000000000 +1000
@@ -902,9 +902,23 @@ unsigned long unmap_vmas(struct mmu_gath
 			}
 
 			if (unlikely(is_vm_hugetlb_page(vma))) {
-				unmap_hugepage_range(vma, start, end, NULL);
-				zap_work -= (end - start) /
+				/*
+				 * It is undesirable to test vma->vm_file as it
+				 * should be non-null for valid hugetlb area.
+				 * However, vm_file will be NULL in the error
+				 * cleanup path of do_mmap_pgoff. When
+				 * hugetlbfs ->mmap method fails,
+				 * do_mmap_pgoff() nullifies vma->vm_file
+				 * before calling this function to clean up.
+				 * Since no pte has actually been setup, it is
+				 * safe to do nothing in this case.
+				 */
+				if (vma->vm_file) {
+					unmap_hugepage_range(vma, start, end, NULL);
+					zap_work -= (end - start) /
 					pages_per_huge_page(hstate_vma(vma));
+				}
+
 				start = end;
 			} else
 				start = unmap_page_range(*tlbp, vma,

-- 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2008-06-04 11:29 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-06-04 11:29 [patch 00/21] hugetlb patches resend npiggin
2008-06-04 11:29 ` [patch 01/21] hugetlb: factor out prep_new_huge_page npiggin
2008-06-04 11:29 ` [patch 02/21] hugetlb: modular state for hugetlb page size npiggin
2008-06-04 11:29 ` [patch 03/21] hugetlb: multiple hstates for multiple page sizes npiggin
2008-06-04 11:29 ` npiggin [this message]
2008-06-04 11:29 ` [patch 05/21] hugetlb: new sysfs interface npiggin
2008-06-08 18:59   ` Andrew Morton
2008-06-10  3:02     ` Nick Piggin
2008-06-12  1:11       ` Nishanth Aravamudan
2008-07-02  0:24         ` Nishanth Aravamudan
2008-06-20 15:18   ` Dave Hansen
2008-06-23  2:48     ` Nick Piggin
2008-06-23  3:31       ` Andrew Morton
2008-06-23  3:52         ` Nick Piggin
2008-06-04 11:29 ` [patch 06/21] hugetlb: abstract numa round robin selection npiggin
2008-06-04 11:29 ` [patch 07/21] mm: introduce non panic alloc_bootmem npiggin
2008-06-04 11:29 ` [patch 08/21] mm: export prep_compound_page to mm npiggin
2008-06-04 11:29 ` [patch 09/21] hugetlb: support larger than MAX_ORDER npiggin
2008-06-04 11:29 ` [patch 10/21] hugetlb: support boot allocate different sizes npiggin
2008-06-04 11:29 ` [patch 11/21] hugetlb: printk cleanup npiggin
2008-06-04 11:29 ` [patch 12/21] hugetlb: introduce pud_huge npiggin
2008-06-11 23:16   ` Andrew Morton
2008-06-12  0:45     ` Nick Piggin
2008-06-04 11:29 ` [patch 13/21] x86: support GB hugepages on 64-bit npiggin
2008-06-04 11:29 ` [patch 14/21] x86: add hugepagesz option " npiggin
2008-06-04 17:51   ` Randy Dunlap
2008-06-05  2:01     ` Nick Piggin
2008-06-04 11:29 ` [patch 15/21] hugetlb: override default huge page size npiggin
2008-06-09 10:41   ` Andrew Morton
2008-06-10  3:22     ` Nick Piggin
2008-06-04 11:29 ` [patch 16/21] hugetlb: allow arch overried hugepage allocation npiggin
2008-06-08 19:14   ` Andrew Morton
2008-06-10  3:26     ` Nick Piggin
2008-06-12  8:08     ` Andy Whitcroft
2008-06-04 11:29 ` [patch 17/21] powerpc: function to allocate gigantic hugepages npiggin
2008-06-04 11:29 ` [patch 18/21] powerpc: scan device tree for gigantic pages npiggin
2008-06-04 11:29 ` [patch 19/21] powerpc: define support for 16G hugepages npiggin
2008-06-08 19:05   ` Andrew Morton
2008-06-10  3:05     ` Nick Piggin
2008-06-04 11:29 ` [patch 20/21] fs: check for statfs overflow npiggin
2008-06-08 19:06   ` Andrew Morton
2008-06-10  3:12     ` Nick Piggin
2008-06-04 11:30 ` [patch 21/21] powerpc: support multiple hugepage sizes npiggin
2008-07-14 16:32   ` [patch] powerpc: hugetlb pgtable cache access cleanup Jon Tollefson
2008-07-14 22:56     ` Andrew Morton
2008-07-15 22:49       ` [patch v2] " Jon Tollefson
2008-07-15 22:57         ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080604113111.521975017@amd.local0.net \
    --to=npiggin@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox