linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: "Huang, Ying" <ying.huang@intel.com>,
	Chris Li <chrisl@kernel.org>, Minchan Kim <minchan@kernel.org>,
	Barry Song <v-songbaohua@oppo.com>,
	Ryan Roberts <ryan.roberts@arm.com>, Yu Zhao <yuzhao@google.com>,
	SeongJae Park <sj@kernel.org>,
	David Hildenbrand <david@redhat.com>,
	Yosry Ahmed <yosryahmed@google.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Matthew Wilcox <willy@infradead.org>,
	Nhat Pham <nphamcs@gmail.com>,
	Chengming Zhou <zhouchengming@bytedance.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [RFC PATCH 01/10] mm/filemap: split filemap storing logic into a standalone helper
Date: Wed, 27 Mar 2024 02:50:23 +0800	[thread overview]
Message-ID: <20240326185032.72159-2-ryncsn@gmail.com> (raw)
In-Reply-To: <20240326185032.72159-1-ryncsn@gmail.com>

From: Kairui Song <kasong@tencent.com>

Swapcache can reuse this part for multi index support, no change of
performance from page cache side except noise:

Test in 8G memory cgroup and 16G brd ramdisk.

  echo 3 > /proc/sys/vm/drop_caches

  fio -name=cached --numjobs=16 --filename=/mnt/test.img \
    --buffered=1 --ioengine=mmap --rw=randread --time_based \
    --ramp_time=30s --runtime=5m --group_reporting

Before:
bw (  MiB/s): min=  493, max= 3947, per=100.00%, avg=2625.56, stdev=25.74, samples=8651
iops        : min=126454, max=1010681, avg=672142.61, stdev=6590.48, samples=8651

After:
bw (  MiB/s): min=  298, max= 3840, per=100.00%, avg=2614.34, stdev=23.77, samples=8689
iops        : min=76464, max=983045, avg=669270.35, stdev=6084.31, samples=8689

Test result with THP (do a THP randread then switch to 4K page in hope it
issues a lot of splitting):

  echo 3 > /proc/sys/vm/drop_caches

  fio -name=cached --numjobs=16 --filename=/mnt/test.img \
      --buffered=1 --ioengine=mmap -thp=1 --readonly \
      --rw=randread --time_based --ramp_time=30s --runtime=10m \
      --group_reporting

  fio -name=cached --numjobs=16 --filename=/mnt/test.img \
      --buffered=1 --ioengine=mmap \
      --rw=randread --time_based --runtime=5s --group_reporting

Before:
bw (  KiB/s): min= 4611, max=15370, per=100.00%, avg=8928.74, stdev=105.17, samples=19146
iops        : min= 1151, max= 3842, avg=2231.27, stdev=26.29, samples=19146

READ: bw=4635B/s (4635B/s), 4635B/s-4635B/s (4635B/s-4635B/s), io=64.0KiB (65.5kB), run=14137-14137msec

After:
bw (  KiB/s): min= 4691, max=15666, per=100.00%, avg=8890.30, stdev=104.53, samples=19056
iops        : min= 1167, max= 3913, avg=2218.68, stdev=26.15, samples=19056

READ: bw=4590B/s (4590B/s), 4590B/s-4590B/s (4590B/s-4590B/s), io=64.0KiB (65.5kB), run=14275-14275msec

Signed-off-by: Kairui Song <kasong@tencent.com>
---
 mm/filemap.c | 124 +++++++++++++++++++++++++++------------------------
 1 file changed, 65 insertions(+), 59 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 90b86f22a9df..0ccdc9e92764 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -848,38 +848,23 @@ void replace_page_cache_folio(struct folio *old, struct folio *new)
 }
 EXPORT_SYMBOL_GPL(replace_page_cache_folio);
 
-noinline int __filemap_add_folio(struct address_space *mapping,
-		struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
+static int __filemap_lock_store(struct xa_state *xas, struct folio *folio,
+				  pgoff_t index, gfp_t gfp, void **shadowp)
 {
-	XA_STATE(xas, &mapping->i_pages, index);
-	void *alloced_shadow = NULL;
-	int alloced_order = 0;
-	bool huge;
-	long nr;
-
-	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
-	VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
-	mapping_set_update(&xas, mapping);
-
-	VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
-	xas_set_order(&xas, index, folio_order(folio));
-	huge = folio_test_hugetlb(folio);
-	nr = folio_nr_pages(folio);
-
+	void *entry, *old, *alloced_shadow = NULL;
+	int order, split_order, alloced_order = 0;
 	gfp &= GFP_RECLAIM_MASK;
-	folio_ref_add(folio, nr);
-	folio->mapping = mapping;
-	folio->index = xas.xa_index;
 
 	for (;;) {
-		int order = -1, split_order = 0;
-		void *entry, *old = NULL;
+		order = -1;
+		split_order = 0;
+		old = NULL;
 
-		xas_lock_irq(&xas);
-		xas_for_each_conflict(&xas, entry) {
+		xas_lock_irq(xas);
+		xas_for_each_conflict(xas, entry) {
 			old = entry;
 			if (!xa_is_value(entry)) {
-				xas_set_err(&xas, -EEXIST);
+				xas_set_err(xas, -EEXIST);
 				goto unlock;
 			}
 			/*
@@ -887,72 +872,93 @@ noinline int __filemap_add_folio(struct address_space *mapping,
 			 * it will be the first and only entry iterated.
 			 */
 			if (order == -1)
-				order = xas_get_order(&xas);
+				order = xas_get_order(xas);
 		}
 
 		/* entry may have changed before we re-acquire the lock */
 		if (alloced_order && (old != alloced_shadow || order != alloced_order)) {
-			xas_destroy(&xas);
+			xas_destroy(xas);
 			alloced_order = 0;
 		}
 
 		if (old) {
 			if (order > 0 && order > folio_order(folio)) {
-				/* How to handle large swap entries? */
-				BUG_ON(shmem_mapping(mapping));
 				if (!alloced_order) {
 					split_order = order;
 					goto unlock;
 				}
-				xas_split(&xas, old, order);
-				xas_reset(&xas);
+				xas_split(xas, old, order);
+				xas_reset(xas);
 			}
 			if (shadowp)
 				*shadowp = old;
 		}
 
-		xas_store(&xas, folio);
-		if (xas_error(&xas))
-			goto unlock;
-
-		mapping->nrpages += nr;
-
-		/* hugetlb pages do not participate in page cache accounting */
-		if (!huge) {
-			__lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
-			if (folio_test_pmd_mappable(folio))
-				__lruvec_stat_mod_folio(folio,
-						NR_FILE_THPS, nr);
-		}
-
+		xas_store(xas, folio);
+		if (!xas_error(xas))
+			return 0;
 unlock:
-		xas_unlock_irq(&xas);
+		xas_unlock_irq(xas);
 
 		/* split needed, alloc here and retry. */
 		if (split_order) {
-			xas_split_alloc(&xas, old, split_order, gfp);
-			if (xas_error(&xas))
+			xas_split_alloc(xas, old, split_order, gfp);
+			if (xas_error(xas))
 				goto error;
 			alloced_shadow = old;
 			alloced_order = split_order;
-			xas_reset(&xas);
+			xas_reset(xas);
 			continue;
 		}
 
-		if (!xas_nomem(&xas, gfp))
+		if (!xas_nomem(xas, gfp))
 			break;
 	}
 
-	if (xas_error(&xas))
-		goto error;
-
-	trace_mm_filemap_add_to_page_cache(folio);
-	return 0;
 error:
-	folio->mapping = NULL;
-	/* Leave page->index set: truncation relies upon it */
-	folio_put_refs(folio, nr);
-	return xas_error(&xas);
+	return xas_error(xas);
+}
+
+noinline int __filemap_add_folio(struct address_space *mapping,
+		struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
+{
+	XA_STATE(xas, &mapping->i_pages, index);
+	bool huge;
+	long nr;
+	int ret;
+
+	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+	VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
+	mapping_set_update(&xas, mapping);
+
+	VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
+	xas_set_order(&xas, index, folio_order(folio));
+	huge = folio_test_hugetlb(folio);
+	nr = folio_nr_pages(folio);
+
+	folio_ref_add(folio, nr);
+	folio->mapping = mapping;
+	folio->index = xas.xa_index;
+
+	ret = __filemap_lock_store(&xas, folio, index, gfp, shadowp);
+	if (!ret) {
+		mapping->nrpages += nr;
+		/* hugetlb pages do not participate in page cache accounting */
+		if (!huge) {
+			__lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
+			if (folio_test_pmd_mappable(folio))
+				__lruvec_stat_mod_folio(folio,
+						NR_FILE_THPS, nr);
+		}
+		xas_unlock_irq(&xas);
+		trace_mm_filemap_add_to_page_cache(folio);
+	} else {
+		folio->mapping = NULL;
+		/* Leave page->index set: truncation relies upon it */
+		folio_put_refs(folio, nr);
+	}
+
+	return ret;
 }
 ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO);
 
-- 
2.43.0



  reply	other threads:[~2024-03-26 19:04 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-26 18:50 [RFC PATCH 00/10] mm/swap: always use swap cache for synchronization Kairui Song
2024-03-26 18:50 ` Kairui Song [this message]
2024-03-26 18:50 ` [RFC PATCH 02/10] mm/swap: move no readahead swapin code to a stand-alone helper Kairui Song
2024-03-26 18:50 ` [RFC PATCH 03/10] mm/swap: convert swapin_readahead to return a folio Kairui Song
2024-03-26 20:03   ` Matthew Wilcox
2024-03-26 18:50 ` [RFC PATCH 04/10] mm/swap: remove cache bypass swapin Kairui Song
2024-03-27  6:30   ` Huang, Ying
2024-03-27  6:55     ` Kairui Song
2024-03-27  7:29       ` Huang, Ying
2024-03-26 18:50 ` [RFC PATCH 05/10] mm/swap: clean shadow only in unmap path Kairui Song
2024-03-26 18:50 ` [RFC PATCH 06/10] mm/swap: switch to use multi index entries Kairui Song
2024-03-26 18:50 ` [RFC PATCH 07/10] mm/swap: rename __read_swap_cache_async to swap_cache_alloc_or_get Kairui Song
2024-03-26 18:50 ` [RFC PATCH 08/10] mm/swap: use swap cache as a synchronization layer Kairui Song
2024-03-26 18:50 ` [RFC PATCH 09/10] mm/swap: delay the swap cache lookup for swapin Kairui Song
2024-03-26 18:50 ` [RFC PATCH 10/10] mm/swap: optimize synchronous swapin Kairui Song
2024-03-27  6:22   ` Huang, Ying
2024-03-27  6:37     ` Kairui Song
2024-03-27  6:47       ` Huang, Ying
2024-03-27  7:14         ` Kairui Song
2024-03-27  8:16           ` Huang, Ying
2024-03-27  8:08   ` Barry Song
2024-03-27  8:44     ` Kairui Song
2024-03-27  2:52 ` [RFC PATCH 00/10] mm/swap: always use swap cache for synchronization Huang, Ying
2024-03-27  3:01   ` Kairui Song
2024-03-27  8:27     ` Ryan Roberts
2024-03-27  8:32       ` Huang, Ying
2024-03-27  9:39         ` Ryan Roberts
2024-03-27 11:04       ` Kairui Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240326185032.72159-2-ryncsn@gmail.com \
    --to=ryncsn@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=chrisl@kernel.org \
    --cc=david@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=kasong@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=minchan@kernel.org \
    --cc=nphamcs@gmail.com \
    --cc=ryan.roberts@arm.com \
    --cc=sj@kernel.org \
    --cc=v-songbaohua@oppo.com \
    --cc=willy@infradead.org \
    --cc=ying.huang@intel.com \
    --cc=yosryahmed@google.com \
    --cc=yuzhao@google.com \
    --cc=zhouchengming@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox