linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
To: linux-mm@kvack.org
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>,
	linux-kernel@vger.kernel.org
Subject: [PATCH 71/75] mm/readahead: Add large folio readahead
Date: Fri,  4 Feb 2022 19:58:48 +0000	[thread overview]
Message-ID: <20220204195852.1751729-72-willy@infradead.org> (raw)
In-Reply-To: <20220204195852.1751729-1-willy@infradead.org>

Allocate large folios in the readahead code when the filesystem supports
them and it seems worth doing.  The heuristic for choosing which folio
sizes will surely need some tuning, but this aggressive ramp-up has been
good for testing.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/readahead.c | 106 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 99 insertions(+), 7 deletions(-)

diff --git a/mm/readahead.c b/mm/readahead.c
index cf0dcf89eb69..5100eaf5b0ee 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -148,7 +148,7 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages,
 
 	blk_finish_plug(&plug);
 
-	BUG_ON(!list_empty(pages));
+	BUG_ON(pages && !list_empty(pages));
 	BUG_ON(readahead_count(rac));
 
 out:
@@ -431,11 +431,103 @@ static int try_context_readahead(struct address_space *mapping,
 	return 1;
 }
 
+/*
+ * There are some parts of the kernel which assume that PMD entries
+ * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
+ * limit the maximum allocation order to PMD size.  I'm not aware of any
+ * assumptions about maximum order if THP are disabled, but 8 seems like
+ * a good order (that's 1MB if you're using 4kB pages)
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define MAX_PAGECACHE_ORDER	HPAGE_PMD_ORDER
+#else
+#define MAX_PAGECACHE_ORDER	8
+#endif
+
+static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
+		pgoff_t mark, unsigned int order, gfp_t gfp)
+{
+	int err;
+	struct folio *folio = filemap_alloc_folio(gfp, order);
+
+	if (!folio)
+		return -ENOMEM;
+	if (mark - index < (1UL << order))
+		folio_set_readahead(folio);
+	err = filemap_add_folio(ractl->mapping, folio, index, gfp);
+	if (err)
+		folio_put(folio);
+	else
+		ractl->_nr_pages += 1UL << order;
+	return err;
+}
+
+static void page_cache_ra_order(struct readahead_control *ractl,
+		struct file_ra_state *ra, unsigned int new_order)
+{
+	struct address_space *mapping = ractl->mapping;
+	pgoff_t index = readahead_index(ractl);
+	pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
+	pgoff_t mark = index + ra->size - ra->async_size;
+	int err = 0;
+	gfp_t gfp = readahead_gfp_mask(mapping);
+
+	if (!mapping_large_folio_support(mapping) || ra->size < 4)
+		goto fallback;
+
+	limit = min(limit, index + ra->size - 1);
+
+	if (new_order < MAX_PAGECACHE_ORDER) {
+		new_order += 2;
+		if (new_order > MAX_PAGECACHE_ORDER)
+			new_order = MAX_PAGECACHE_ORDER;
+		while ((1 << new_order) > ra->size)
+			new_order--;
+	}
+
+	while (index <= limit) {
+		unsigned int order = new_order;
+
+		/* Align with smaller pages if needed */
+		if (index & ((1UL << order) - 1)) {
+			order = __ffs(index);
+			if (order == 1)
+				order = 0;
+		}
+		/* Don't allocate pages past EOF */
+		while (index + (1UL << order) - 1 > limit) {
+			if (--order == 1)
+				order = 0;
+		}
+		err = ra_alloc_folio(ractl, index, mark, order, gfp);
+		if (err)
+			break;
+		index += 1UL << order;
+	}
+
+	if (index > limit) {
+		ra->size += index - limit - 1;
+		ra->async_size += index - limit - 1;
+	}
+
+	read_pages(ractl, NULL, false);
+
+	/*
+	 * If there were already pages in the page cache, then we may have
+	 * left some gaps.  Let the regular readahead code take care of this
+	 * situation.
+	 */
+	if (!err)
+		return;
+fallback:
+	do_page_cache_ra(ractl, ra->size, ra->async_size);
+}
+
 /*
  * A minimal readahead algorithm for trivial sequential/random reads.
  */
 static void ondemand_readahead(struct readahead_control *ractl,
-		bool hit_readahead_marker, unsigned long req_size)
+		struct folio *folio, unsigned long req_size)
 {
 	struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
 	struct file_ra_state *ra = ractl->ra;
@@ -470,12 +562,12 @@ static void ondemand_readahead(struct readahead_control *ractl,
 	}
 
 	/*
-	 * Hit a marked page without valid readahead state.
+	 * Hit a marked folio without valid readahead state.
 	 * E.g. interleaved reads.
 	 * Query the pagecache for async_size, which normally equals to
 	 * readahead size. Ramp it up and use it as the new readahead size.
 	 */
-	if (hit_readahead_marker) {
+	if (folio) {
 		pgoff_t start;
 
 		rcu_read_lock();
@@ -548,7 +640,7 @@ static void ondemand_readahead(struct readahead_control *ractl,
 	}
 
 	ractl->_index = ra->start;
-	do_page_cache_ra(ractl, ra->size, ra->async_size);
+	page_cache_ra_order(ractl, ra, folio ? folio_order(folio) : 0);
 }
 
 void page_cache_sync_ra(struct readahead_control *ractl,
@@ -576,7 +668,7 @@ void page_cache_sync_ra(struct readahead_control *ractl,
 	}
 
 	/* do read-ahead */
-	ondemand_readahead(ractl, false, req_count);
+	ondemand_readahead(ractl, NULL, req_count);
 }
 EXPORT_SYMBOL_GPL(page_cache_sync_ra);
 
@@ -605,7 +697,7 @@ void page_cache_async_ra(struct readahead_control *ractl,
 		return;
 
 	/* do read-ahead */
-	ondemand_readahead(ractl, true, req_count);
+	ondemand_readahead(ractl, folio, req_count);
 }
 EXPORT_SYMBOL_GPL(page_cache_async_ra);
 
-- 
2.34.1



  parent reply	other threads:[~2022-02-04 20:00 UTC|newest]

Thread overview: 115+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-04 19:57 [PATCH 00/75] MM folio patches for 5.18 Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 01/75] mm/gup: Increment the page refcount before the pincount Matthew Wilcox (Oracle)
2022-02-04 21:13   ` John Hubbard
2022-02-04 21:28     ` Matthew Wilcox
2022-02-07  7:45   ` Christoph Hellwig
2022-02-04 19:57 ` [PATCH 02/75] mm/gup: Remove for_each_compound_range() Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 03/75] mm/gup: Remove for_each_compound_head() Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 04/75] mm/gup: Change the calling convention for compound_range_next() Matthew Wilcox (Oracle)
2022-02-07  7:45   ` Christoph Hellwig
2022-02-04 19:57 ` [PATCH 05/75] mm/gup: Optimise compound_range_next() Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 06/75] mm/gup: Change the calling convention for compound_next() Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 07/75] mm/gup: Fix some contiguous memmap assumptions Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 08/75] mm/gup: Remove an assumption of a contiguous memmap Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 09/75] mm/gup: Handle page split race more efficiently Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 10/75] mm/gup: Remove hpage_pincount_add() Matthew Wilcox (Oracle)
2022-02-04 21:29   ` John Hubbard
2022-02-07  7:46   ` Christoph Hellwig
2022-02-04 19:57 ` [PATCH 11/75] mm/gup: Remove hpage_pincount_sub() Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 12/75] mm: Make compound_pincount always available Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 13/75] mm: Add folio_pincount_ptr() Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 14/75] mm: Turn page_maybe_dma_pinned() into folio_maybe_dma_pinned() Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 15/75] mm/gup: Add try_get_folio() and try_grab_folio() Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 16/75] mm/gup: Convert try_grab_page() to use a folio Matthew Wilcox (Oracle)
2022-02-06  2:12   ` John Hubbard
2022-02-07  7:47   ` Christoph Hellwig
2022-02-04 19:57 ` [PATCH 17/75] mm: Remove page_cache_add_speculative() and page_cache_get_speculative() Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 18/75] mm/gup: Add gup_put_folio() Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 19/75] mm/hugetlb: Use try_grab_folio() instead of try_grab_compound_head() Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 20/75] mm/gup: Convert gup_pte_range() to use a folio Matthew Wilcox (Oracle)
2022-02-06 14:52   ` Mark Hemment
2022-02-11 20:20     ` Matthew Wilcox
2022-02-04 19:57 ` [PATCH 21/75] mm/gup: Convert gup_hugepte() " Matthew Wilcox (Oracle)
2022-02-04 19:57 ` [PATCH 22/75] mm/gup: Convert gup_huge_pmd() " Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 23/75] mm/gup: Convert gup_huge_pud() " Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 24/75] mm/gup: Convert gup_huge_pgd() " Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 25/75] mm/gup: Turn compound_next() into gup_folio_next() Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 26/75] mm/gup: Turn compound_range_next() into gup_folio_range_next() Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 27/75] mm: Turn isolate_lru_page() into folio_isolate_lru() Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 28/75] mm/gup: Convert check_and_migrate_movable_pages() to use a folio Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 29/75] mm/workingset: Convert workingset_eviction() to take " Matthew Wilcox (Oracle)
2022-02-07  7:49   ` Christoph Hellwig
2022-02-04 19:58 ` [PATCH 30/75] mm/memcg: Convert mem_cgroup_swapout() " Matthew Wilcox (Oracle)
2022-02-07  7:49   ` Christoph Hellwig
2022-02-04 19:58 ` [PATCH 31/75] mm: Add lru_to_folio() Matthew Wilcox (Oracle)
2022-02-07  7:50   ` Christoph Hellwig
2022-02-11 20:24     ` Matthew Wilcox
2022-02-04 19:58 ` [PATCH 32/75] mm: Turn putback_lru_page() into folio_putback_lru() Matthew Wilcox (Oracle)
2022-02-07  7:50   ` Christoph Hellwig
2022-02-04 19:58 ` [PATCH 33/75] mm/vmscan: Convert __remove_mapping() to take a folio Matthew Wilcox (Oracle)
2022-02-07  7:51   ` Christoph Hellwig
2022-02-04 19:58 ` [PATCH 34/75] mm/vmscan: Turn page_check_dirty_writeback() into folio_check_dirty_writeback() Matthew Wilcox (Oracle)
2022-02-07  7:51   ` Christoph Hellwig
2022-02-12  1:49     ` Matthew Wilcox
2022-02-04 19:58 ` [PATCH 35/75] mm: Turn head_compound_mapcount() into folio_entire_mapcount() Matthew Wilcox (Oracle)
2022-02-07  7:52   ` Christoph Hellwig
2022-02-04 19:58 ` [PATCH 36/75] mm: Add folio_mapcount() Matthew Wilcox (Oracle)
2022-02-07  7:53   ` Christoph Hellwig
2022-02-04 19:58 ` [PATCH 37/75] mm: Add split_folio_to_list() Matthew Wilcox (Oracle)
2022-02-07  7:54   ` Christoph Hellwig
2022-02-04 19:58 ` [PATCH 38/75] mm: Add folio_is_zone_device() and folio_is_device_private() Matthew Wilcox (Oracle)
2022-02-07  7:54   ` Christoph Hellwig
2022-02-04 19:58 ` [PATCH 39/75] mm: Add folio_pgoff() Matthew Wilcox (Oracle)
2022-02-07  7:55   ` Christoph Hellwig
2022-02-04 19:58 ` [PATCH 40/75] mm: Add pvmw_set_page() and pvmw_set_folio() Matthew Wilcox (Oracle)
2022-02-07  7:55   ` Christoph Hellwig
2022-02-04 19:58 ` [PATCH 41/75] hexagon: Add pmd_pfn() Matthew Wilcox (Oracle)
2022-02-06 18:13   ` Mike Rapoport
2022-02-06 20:46     ` Matthew Wilcox
2022-02-06 21:33       ` Mike Rapoport
2022-02-06 22:05         ` Matthew Wilcox
2022-02-07 14:24           ` Mike Rapoport
2022-02-04 19:58 ` [PATCH 42/75] mm: Convert page_vma_mapped_walk to work on PFNs Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 43/75] mm/page_idle: Convert page_idle_clear_pte_refs() to use a folio Matthew Wilcox (Oracle)
2022-02-07  7:57   ` Christoph Hellwig
2022-02-04 19:58 ` [PATCH 44/75] mm/rmap: Use a folio in page_mkclean_one() Matthew Wilcox (Oracle)
2022-02-07  7:57   ` Christoph Hellwig
2022-02-04 19:58 ` [PATCH 45/75] mm/rmap: Turn page_referenced() into folio_referenced() Matthew Wilcox (Oracle)
2022-02-07  7:58   ` Christoph Hellwig
2022-02-04 19:58 ` [PATCH 46/75] mm/mlock: Turn clear_page_mlock() into folio_end_mlock() Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 47/75] mm/mlock: Turn mlock_vma_page() into mlock_vma_folio() Matthew Wilcox (Oracle)
2022-02-07 10:46   ` Mike Rapoport
2022-02-04 19:58 ` [PATCH 48/75] mm/rmap: Turn page_mlock() into folio_mlock() Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 49/75] mm/mlock: Turn munlock_vma_page() into munlock_vma_folio() Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 50/75] mm/huge_memory: Convert __split_huge_pmd() to take a folio Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 51/75] mm/rmap: Convert try_to_unmap() " Matthew Wilcox (Oracle)
2022-02-09 14:24   ` Mauricio Faria de Oliveira
2022-02-09 14:29     ` Matthew Wilcox
2022-02-04 19:58 ` [PATCH 52/75] mm/rmap: Convert try_to_migrate() to folios Matthew Wilcox (Oracle)
2022-02-09 15:27   ` Zi Yan
2022-02-04 19:58 ` [PATCH 53/75] mm/rmap: Convert make_device_exclusive_range() to use folios Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 54/75] mm/migrate: Convert remove_migration_ptes() to folios Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 55/75] mm/damon: Convert damon_pa_mkold() to use a folio Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 56/75] mm/damon: Convert damon_pa_young() " Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 57/75] mm/rmap: Turn page_lock_anon_vma_read() into folio_lock_anon_vma_read() Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 58/75] mm: Turn page_anon_vma() into folio_anon_vma() Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 59/75] mm/rmap: Convert rmap_walk() to take a folio Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 60/75] mm/rmap: Constify the rmap_walk_control argument Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 61/75] mm/vmscan: Free non-shmem folios without splitting them Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 62/75] mm/vmscan: Optimise shrink_page_list for non-PMD-sized folios Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 63/75] mm/vmscan: Account large folios correctly Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 64/75] mm/vmscan: Turn page_check_references() into folio_check_references() Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 65/75] mm/vmscan: Convert pageout() to take a folio Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 66/75] mm: Turn can_split_huge_page() into can_split_folio() Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 67/75] mm/filemap: Allow large folios to be added to the page cache Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 68/75] mm: Fix READ_ONLY_THP warning Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 69/75] mm: Make large folios depend on THP Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 70/75] mm: Support arbitrary THP sizes Matthew Wilcox (Oracle)
2022-02-04 19:58 ` Matthew Wilcox (Oracle) [this message]
2022-02-06 13:10   ` [PATCH 71/75] mm/readahead: Add large folio readahead Mark Hemment
2022-02-04 19:58 ` [PATCH 72/75] mm/readahead: Align file mappings for non-DAX Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 73/75] mm/readahead: Switch to page_cache_ra_order Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 74/75] mm/filemap: Support VM_HUGEPAGE for file mappings Matthew Wilcox (Oracle)
2022-02-04 19:58 ` [PATCH 75/75] selftests/vm/transhuge-stress: Support file-backed PMD folios Matthew Wilcox (Oracle)
2022-02-13 22:31 ` [PATCH 00/75] MM folio patches for 5.18 John Hubbard
2022-02-14  4:33 ` Matthew Wilcox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220204195852.1751729-72-willy@infradead.org \
    --to=willy@infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox