[PATCH 1/2] mm: Allow the page cache to allocate large pages

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: William Kucharski <william.kucharski@oracle.com>
To: ceph-devel@vger.kernel.org, linux-afs@lists.infradead.org,
	linux-btrfs@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, netdev@vger.kernel.org,
	Chris Mason <clm@fb.com>, "David S. Miller" <davem@davemloft.net>,
	David Sterba <dsterba@suse.com>,
	Josef Bacik <josef@toxicpanda.com>
Cc: "Dave Hansen" <dave.hansen@linux.intel.com>,
	"Song Liu" <songliubraving@fb.com>,
	"Bob Kasten" <robert.a.kasten@intel.com>,
	"Mike Kravetz" <mike.kravetz@oracle.com>,
	"William Kucharski" <william.kucharski@oracle.com>,
	"Chad Mynhier" <chad.mynhier@oracle.com>,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	"Johannes Weiner" <jweiner@fb.com>,
	"Matthew Wilcox" <willy@infradead.org>,
	"Dave Airlie" <airlied@redhat.com>,
	"Vlastimil Babka" <vbabka@suse.cz>,
	"Keith Busch" <keith.busch@intel.com>,
	"Ralph Campbell" <rcampbell@nvidia.com>,
	"Steve Capper" <steve.capper@arm.com>,
	"Dave Chinner" <dchinner@redhat.com>,
	"Sean Christopherson" <sean.j.christopherson@intel.com>,
	"Hugh Dickins" <hughd@google.com>,
	"Ilya Dryomov" <idryomov@gmail.com>,
	"Alexander Duyck" <alexander.h.duyck@linux.intel.com>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Jérôme Glisse" <jglisse@redhat.com>,
	"Amir Goldstein" <amir73il@gmail.com>,
	"Jason Gunthorpe" <jgg@ziepe.ca>,
	"Michal Hocko" <mhocko@suse.com>, "Jann Horn" <jannh@google.com>,
	"David Howells" <dhowells@redhat.com>,
	"John Hubbard" <jhubbard@nvidia.com>,
	"Souptick Joarder" <jrdr.linux@gmail.com>,
	"john.hubbard@gmail.com" <john.hubbard@gmail.com>,
	"Jan Kara" <jack@suse.cz>,
	"Andrey Konovalov" <andreyknvl@google.com>,
	"Arun KS" <arunks@codeaurora.org>,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>,
	"Jeff Layton" <jlayton@kernel.org>,
	"Yangtao Li" <tiny.windzz@gmail.com>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Robin Murphy" <robin.murphy@arm.com>,
	"Mike Rapoport" <rppt@linux.ibm.com>,
	"David Rientjes" <rientjes@google.com>,
	"Andrey Ryabinin" <aryabinin@virtuozzo.com>,
	"Yafang Shao" <laoar.shao@gmail.com>,
	"Huang Shijie" <sjhuang@iluvatar.ai>,
	"Yang Shi" <yang.shi@linux.alibaba.com>,
	"Miklos Szeredi" <mszeredi@redhat.com>,
	"Pavel Tatashin" <pasha.tatashin@oracle.com>,
	"Kirill Tkhai" <ktkhai@virtuozzo.com>,
	"Sage Weil" <sage@redhat.com>, "Ira Weiny" <ira.weiny@intel.com>,
	"Dan Williams" <dan.j.williams@intel.com>,
	"Darrick J. Wong" <darrick.wong@oracle.com>,
	"Gao Xiang" <hsiangkao@aol.com>,
	"Bartlomiej Zolnierkiewicz" <b.zolnierkie@samsung.com>,
	"Ross Zwisler" <zwisler@google.com>
Subject: [PATCH 1/2] mm: Allow the page cache to allocate large pages
Date: Sun, 28 Jul 2019 16:47:07 -0600	[thread overview]
Message-ID: <20190728224708.28192-2-william.kucharski@oracle.com> (raw)
In-Reply-To: <20190728224708.28192-1-william.kucharski@oracle.com>

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: William Kucharski <william.kucharski@oracle.com> 
---
 fs/afs/dir.c            |  2 +-
 fs/btrfs/compression.c  |  2 +-
 fs/cachefiles/rdwr.c    |  4 ++--
 fs/ceph/addr.c          |  2 +-
 fs/ceph/file.c          |  2 +-
 include/linux/pagemap.h | 13 +++++++++----
 mm/filemap.c            | 25 +++++++++++++------------
 mm/readahead.c          |  2 +-
 net/ceph/pagelist.c     |  4 ++--
 net/ceph/pagevec.c      |  2 +-
 10 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index e640d67274be..0a392214f71e 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -274,7 +274,7 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
 				afs_stat_v(dvnode, n_inval);
 
 			ret = -ENOMEM;
-			req->pages[i] = __page_cache_alloc(gfp);
+			req->pages[i] = __page_cache_alloc(gfp, 0);
 			if (!req->pages[i])
 				goto error;
 			ret = add_to_page_cache_lru(req->pages[i],
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 60c47b417a4b..5280e7477b7e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -466,7 +466,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 		}
 
 		page = __page_cache_alloc(mapping_gfp_constraint(mapping,
-								 ~__GFP_FS));
+								 ~__GFP_FS), 0);
 		if (!page)
 			break;
 
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 44a3ce1e4ce4..11d30212745f 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -259,7 +259,7 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
 			goto backing_page_already_present;
 
 		if (!newpage) {
-			newpage = __page_cache_alloc(cachefiles_gfp);
+			newpage = __page_cache_alloc(cachefiles_gfp, 0);
 			if (!newpage)
 				goto nomem_monitor;
 		}
@@ -495,7 +495,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
 				goto backing_page_already_present;
 
 			if (!newpage) {
-				newpage = __page_cache_alloc(cachefiles_gfp);
+				newpage = __page_cache_alloc(cachefiles_gfp, 0);
 				if (!newpage)
 					goto nomem;
 			}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index e078cc55b989..bcb41fbee533 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1707,7 +1707,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
 		if (len > PAGE_SIZE)
 			len = PAGE_SIZE;
 	} else {
-		page = __page_cache_alloc(GFP_NOFS);
+		page = __page_cache_alloc(GFP_NOFS, 0);
 		if (!page) {
 			err = -ENOMEM;
 			goto out;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 685a03cc4b77..ae58d7c31aa4 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1305,7 +1305,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		struct page *page = NULL;
 		loff_t i_size;
 		if (retry_op == READ_INLINE) {
-			page = __page_cache_alloc(GFP_KERNEL);
+			page = __page_cache_alloc(GFP_KERNEL, 0);
 			if (!page)
 				return -ENOMEM;
 		}
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c7552459a15f..e9004e3cb6a3 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -208,17 +208,17 @@ static inline int page_cache_add_speculative(struct page *page, int count)
 }
 
 #ifdef CONFIG_NUMA
-extern struct page *__page_cache_alloc(gfp_t gfp);
+extern struct page *__page_cache_alloc(gfp_t gfp, unsigned int order);
 #else
-static inline struct page *__page_cache_alloc(gfp_t gfp)
+static inline struct page *__page_cache_alloc(gfp_t gfp, unsigned int order)
 {
-	return alloc_pages(gfp, 0);
+	return alloc_pages(gfp, order);
 }
 #endif
 
 static inline struct page *page_cache_alloc(struct address_space *x)
 {
-	return __page_cache_alloc(mapping_gfp_mask(x));
+	return __page_cache_alloc(mapping_gfp_mask(x), 0);
 }
 
 static inline gfp_t readahead_gfp_mask(struct address_space *x)
@@ -240,6 +240,11 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
 #define FGP_NOFS		0x00000010
 #define FGP_NOWAIT		0x00000020
 #define FGP_FOR_MMAP		0x00000040
+/* If you add more flags, increment FGP_ORDER_SHIFT */
+#define	FGP_ORDER_SHIFT		7
+#define	FGP_PMD			((PMD_SHIFT - PAGE_SHIFT) << FGP_ORDER_SHIFT)
+#define	FGP_PUD			((PUD_SHIFT - PAGE_SHIFT) << FGP_ORDER_SHIFT)
+#define	fgp_get_order(fgp)	((fgp) >> FGP_ORDER_SHIFT)
 
 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
 		int fgp_flags, gfp_t cache_gfp_mask);
diff --git a/mm/filemap.c b/mm/filemap.c
index d0cf700bf201..eb4c87428099 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -954,7 +954,7 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
 
 #ifdef CONFIG_NUMA
-struct page *__page_cache_alloc(gfp_t gfp)
+struct page *__page_cache_alloc(gfp_t gfp, unsigned int order)
 {
 	int n;
 	struct page *page;
@@ -964,12 +964,12 @@ struct page *__page_cache_alloc(gfp_t gfp)
 		do {
 			cpuset_mems_cookie = read_mems_allowed_begin();
 			n = cpuset_mem_spread_node();
-			page = __alloc_pages_node(n, gfp, 0);
+			page = __alloc_pages_node(n, gfp, order);
 		} while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
 
 		return page;
 	}
-	return alloc_pages(gfp, 0);
+	return alloc_pages(gfp, order);
 }
 EXPORT_SYMBOL(__page_cache_alloc);
 #endif
@@ -1597,12 +1597,12 @@ EXPORT_SYMBOL(find_lock_entry);
  * pagecache_get_page - find and get a page reference
  * @mapping: the address_space to search
  * @offset: the page index
- * @fgp_flags: PCG flags
+ * @fgp_flags: FGP flags
  * @gfp_mask: gfp mask to use for the page cache data page allocation
  *
  * Looks up the page cache slot at @mapping & @offset.
  *
- * PCG flags modify how the page is returned.
+ * FGP flags modify how the page is returned.
  *
  * @fgp_flags can be:
  *
@@ -1615,6 +1615,7 @@ EXPORT_SYMBOL(find_lock_entry);
  * - FGP_FOR_MMAP: Similar to FGP_CREAT, only we want to allow the caller to do
  *   its own locking dance if the page is already in cache, or unlock the page
  *   before returning if we had to add the page to pagecache.
+ * - FGP_PMD: If FGP_CREAT is specified, attempt to allocate a PMD-sized page.
  *
  * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even
  * if the GFP flags specified for FGP_CREAT are atomic.
@@ -1660,12 +1661,13 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
 no_page:
 	if (!page && (fgp_flags & FGP_CREAT)) {
 		int err;
-		if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping))
+		if ((fgp_flags & FGP_WRITE) &&
+			mapping_cap_account_dirty(mapping))
 			gfp_mask |= __GFP_WRITE;
 		if (fgp_flags & FGP_NOFS)
 			gfp_mask &= ~__GFP_FS;
 
-		page = __page_cache_alloc(gfp_mask);
+		page = __page_cache_alloc(gfp_mask, fgp_order(fgp_flags));
 		if (!page)
 			return NULL;
 
@@ -2802,15 +2804,14 @@ static struct page *wait_on_page_read(struct page *page)
 static struct page *do_read_cache_page(struct address_space *mapping,
 				pgoff_t index,
 				int (*filler)(void *, struct page *),
-				void *data,
-				gfp_t gfp)
+				void *data, unsigned int order, gfp_t gfp)
 {
 	struct page *page;
 	int err;
 repeat:
 	page = find_get_page(mapping, index);
 	if (!page) {
-		page = __page_cache_alloc(gfp);
+		page = __page_cache_alloc(gfp, order);
 		if (!page)
 			return ERR_PTR(-ENOMEM);
 		err = add_to_page_cache_lru(page, mapping, index, gfp);
@@ -2917,7 +2918,7 @@ struct page *read_cache_page(struct address_space *mapping,
 				int (*filler)(void *, struct page *),
 				void *data)
 {
-	return do_read_cache_page(mapping, index, filler, data,
+	return do_read_cache_page(mapping, index, filler, data, 0,
 			mapping_gfp_mask(mapping));
 }
 EXPORT_SYMBOL(read_cache_page);
@@ -2939,7 +2940,7 @@ struct page *read_cache_page_gfp(struct address_space *mapping,
 				pgoff_t index,
 				gfp_t gfp)
 {
-	return do_read_cache_page(mapping, index, NULL, NULL, gfp);
+	return do_read_cache_page(mapping, index, NULL, NULL, 0, gfp);
 }
 EXPORT_SYMBOL(read_cache_page_gfp);
 
diff --git a/mm/readahead.c b/mm/readahead.c
index 2fe72cd29b47..954760a612ea 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -193,7 +193,7 @@ unsigned int __do_page_cache_readahead(struct address_space *mapping,
 			continue;
 		}
 
-		page = __page_cache_alloc(gfp_mask);
+		page = __page_cache_alloc(gfp_mask, 0);
 		if (!page)
 			break;
 		page->index = page_offset;
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 65e34f78b05d..0c3face908dc 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -56,7 +56,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
 	struct page *page;
 
 	if (!pl->num_pages_free) {
-		page = __page_cache_alloc(GFP_NOFS);
+		page = __page_cache_alloc(GFP_NOFS, 0);
 	} else {
 		page = list_first_entry(&pl->free_list, struct page, lru);
 		list_del(&page->lru);
@@ -107,7 +107,7 @@ int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space)
 	space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT;   /* conv to num pages */
 
 	while (space > pl->num_pages_free) {
-		struct page *page = __page_cache_alloc(GFP_NOFS);
+		struct page *page = __page_cache_alloc(GFP_NOFS, 0);
 		if (!page)
 			return -ENOMEM;
 		list_add_tail(&page->lru, &pl->free_list);
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 64305e7056a1..1d07e639216d 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -45,7 +45,7 @@ struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
 	if (!pages)
 		return ERR_PTR(-ENOMEM);
 	for (i = 0; i < num_pages; i++) {
-		pages[i] = __page_cache_alloc(flags);
+		pages[i] = __page_cache_alloc(flags, 0);
 		if (pages[i] == NULL) {
 			ceph_release_page_vector(pages, i);
 			return ERR_PTR(-ENOMEM);
-- 
2.21.0

next prev parent reply	other threads:[~2019-07-28 22:49 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-28 22:47 [PATCH 0/2] mm,thp: Add filemap_huge_fault() for THP William Kucharski
2019-07-28 22:47 ` William Kucharski [this message]
2019-07-29 20:00   ` [PATCH 1/2] mm: Allow the page cache to allocate large pages kbuild test robot
2019-07-28 22:47 ` [PATCH 2/2] mm,thp: Add experimental config option RO_EXEC_FILEMAP_HUGE_FAULT_THP William Kucharski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190728224708.28192-2-william.kucharski@oracle.com \
    --to=william.kucharski@oracle.com \
    --cc=airlied@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.h.duyck@linux.intel.com \
    --cc=amir73il@gmail.com \
    --cc=andreyknvl@google.com \
    --cc=aneesh.kumar@linux.ibm.com \
    --cc=arunks@codeaurora.org \
    --cc=aryabinin@virtuozzo.com \
    --cc=b.zolnierkie@samsung.com \
    --cc=ceph-devel@vger.kernel.org \
    --cc=chad.mynhier@oracle.com \
    --cc=clm@fb.com \
    --cc=dan.j.williams@intel.com \
    --cc=darrick.wong@oracle.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=davem@davemloft.net \
    --cc=dchinner@redhat.com \
    --cc=dhowells@redhat.com \
    --cc=dsterba@suse.com \
    --cc=hsiangkao@aol.com \
    --cc=hughd@google.com \
    --cc=idryomov@gmail.com \
    --cc=ira.weiny@intel.com \
    --cc=jack@suse.cz \
    --cc=jannh@google.com \
    --cc=jgg@ziepe.ca \
    --cc=jglisse@redhat.com \
    --cc=jhubbard@nvidia.com \
    --cc=jlayton@kernel.org \
    --cc=john.hubbard@gmail.com \
    --cc=josef@toxicpanda.com \
    --cc=jrdr.linux@gmail.com \
    --cc=jweiner@fb.com \
    --cc=keith.busch@intel.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=ktkhai@virtuozzo.com \
    --cc=laoar.shao@gmail.com \
    --cc=linux-afs@lists.infradead.org \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=mike.kravetz@oracle.com \
    --cc=mszeredi@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=pasha.tatashin@oracle.com \
    --cc=rcampbell@nvidia.com \
    --cc=rientjes@google.com \
    --cc=robert.a.kasten@intel.com \
    --cc=robin.murphy@arm.com \
    --cc=rppt@linux.ibm.com \
    --cc=sage@redhat.com \
    --cc=sean.j.christopherson@intel.com \
    --cc=sjhuang@iluvatar.ai \
    --cc=songliubraving@fb.com \
    --cc=steve.capper@arm.com \
    --cc=tglx@linutronix.de \
    --cc=tiny.windzz@gmail.com \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    --cc=yang.shi@linux.alibaba.com \
    --cc=zwisler@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox