From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: "Huang, Ying" <ying.huang@intel.com>,
Chris Li <chrisl@kernel.org>, Minchan Kim <minchan@kernel.org>,
Barry Song <v-songbaohua@oppo.com>,
Ryan Roberts <ryan.roberts@arm.com>, Yu Zhao <yuzhao@google.com>,
SeongJae Park <sj@kernel.org>,
David Hildenbrand <david@redhat.com>,
Yosry Ahmed <yosryahmed@google.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Matthew Wilcox <willy@infradead.org>,
Nhat Pham <nphamcs@gmail.com>,
Chengming Zhou <zhouchengming@bytedance.com>,
Andrew Morton <akpm@linux-foundation.org>,
linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [RFC PATCH 01/10] mm/filemap: split filemap storing logic into a standalone helper
Date: Wed, 27 Mar 2024 02:50:23 +0800 [thread overview]
Message-ID: <20240326185032.72159-2-ryncsn@gmail.com> (raw)
In-Reply-To: <20240326185032.72159-1-ryncsn@gmail.com>
From: Kairui Song <kasong@tencent.com>
Swapcache can reuse this part for multi index support, no change of
performance from page cache side except noise:
Test in 8G memory cgroup and 16G brd ramdisk.
echo 3 > /proc/sys/vm/drop_caches
fio -name=cached --numjobs=16 --filename=/mnt/test.img \
--buffered=1 --ioengine=mmap --rw=randread --time_based \
--ramp_time=30s --runtime=5m --group_reporting
Before:
bw ( MiB/s): min= 493, max= 3947, per=100.00%, avg=2625.56, stdev=25.74, samples=8651
iops : min=126454, max=1010681, avg=672142.61, stdev=6590.48, samples=8651
After:
bw ( MiB/s): min= 298, max= 3840, per=100.00%, avg=2614.34, stdev=23.77, samples=8689
iops : min=76464, max=983045, avg=669270.35, stdev=6084.31, samples=8689
Test result with THP (do a THP randread then switch to 4K page in hope it
issues a lot of splitting):
echo 3 > /proc/sys/vm/drop_caches
fio -name=cached --numjobs=16 --filename=/mnt/test.img \
--buffered=1 --ioengine=mmap -thp=1 --readonly \
--rw=randread --time_based --ramp_time=30s --runtime=10m \
--group_reporting
fio -name=cached --numjobs=16 --filename=/mnt/test.img \
--buffered=1 --ioengine=mmap \
--rw=randread --time_based --runtime=5s --group_reporting
Before:
bw ( KiB/s): min= 4611, max=15370, per=100.00%, avg=8928.74, stdev=105.17, samples=19146
iops : min= 1151, max= 3842, avg=2231.27, stdev=26.29, samples=19146
READ: bw=4635B/s (4635B/s), 4635B/s-4635B/s (4635B/s-4635B/s), io=64.0KiB (65.5kB), run=14137-14137msec
After:
bw ( KiB/s): min= 4691, max=15666, per=100.00%, avg=8890.30, stdev=104.53, samples=19056
iops : min= 1167, max= 3913, avg=2218.68, stdev=26.15, samples=19056
READ: bw=4590B/s (4590B/s), 4590B/s-4590B/s (4590B/s-4590B/s), io=64.0KiB (65.5kB), run=14275-14275msec
Signed-off-by: Kairui Song <kasong@tencent.com>
---
mm/filemap.c | 124 +++++++++++++++++++++++++++------------------------
1 file changed, 65 insertions(+), 59 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index 90b86f22a9df..0ccdc9e92764 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -848,38 +848,23 @@ void replace_page_cache_folio(struct folio *old, struct folio *new)
}
EXPORT_SYMBOL_GPL(replace_page_cache_folio);
-noinline int __filemap_add_folio(struct address_space *mapping,
- struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
+static int __filemap_lock_store(struct xa_state *xas, struct folio *folio,
+ pgoff_t index, gfp_t gfp, void **shadowp)
{
- XA_STATE(xas, &mapping->i_pages, index);
- void *alloced_shadow = NULL;
- int alloced_order = 0;
- bool huge;
- long nr;
-
- VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
- VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
- mapping_set_update(&xas, mapping);
-
- VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
- xas_set_order(&xas, index, folio_order(folio));
- huge = folio_test_hugetlb(folio);
- nr = folio_nr_pages(folio);
-
+ void *entry, *old, *alloced_shadow = NULL;
+ int order, split_order, alloced_order = 0;
gfp &= GFP_RECLAIM_MASK;
- folio_ref_add(folio, nr);
- folio->mapping = mapping;
- folio->index = xas.xa_index;
for (;;) {
- int order = -1, split_order = 0;
- void *entry, *old = NULL;
+ order = -1;
+ split_order = 0;
+ old = NULL;
- xas_lock_irq(&xas);
- xas_for_each_conflict(&xas, entry) {
+ xas_lock_irq(xas);
+ xas_for_each_conflict(xas, entry) {
old = entry;
if (!xa_is_value(entry)) {
- xas_set_err(&xas, -EEXIST);
+ xas_set_err(xas, -EEXIST);
goto unlock;
}
/*
@@ -887,72 +872,93 @@ noinline int __filemap_add_folio(struct address_space *mapping,
* it will be the first and only entry iterated.
*/
if (order == -1)
- order = xas_get_order(&xas);
+ order = xas_get_order(xas);
}
/* entry may have changed before we re-acquire the lock */
if (alloced_order && (old != alloced_shadow || order != alloced_order)) {
- xas_destroy(&xas);
+ xas_destroy(xas);
alloced_order = 0;
}
if (old) {
if (order > 0 && order > folio_order(folio)) {
- /* How to handle large swap entries? */
- BUG_ON(shmem_mapping(mapping));
if (!alloced_order) {
split_order = order;
goto unlock;
}
- xas_split(&xas, old, order);
- xas_reset(&xas);
+ xas_split(xas, old, order);
+ xas_reset(xas);
}
if (shadowp)
*shadowp = old;
}
- xas_store(&xas, folio);
- if (xas_error(&xas))
- goto unlock;
-
- mapping->nrpages += nr;
-
- /* hugetlb pages do not participate in page cache accounting */
- if (!huge) {
- __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
- if (folio_test_pmd_mappable(folio))
- __lruvec_stat_mod_folio(folio,
- NR_FILE_THPS, nr);
- }
-
+ xas_store(xas, folio);
+ if (!xas_error(xas))
+ return 0;
unlock:
- xas_unlock_irq(&xas);
+ xas_unlock_irq(xas);
/* split needed, alloc here and retry. */
if (split_order) {
- xas_split_alloc(&xas, old, split_order, gfp);
- if (xas_error(&xas))
+ xas_split_alloc(xas, old, split_order, gfp);
+ if (xas_error(xas))
goto error;
alloced_shadow = old;
alloced_order = split_order;
- xas_reset(&xas);
+ xas_reset(xas);
continue;
}
- if (!xas_nomem(&xas, gfp))
+ if (!xas_nomem(xas, gfp))
break;
}
- if (xas_error(&xas))
- goto error;
-
- trace_mm_filemap_add_to_page_cache(folio);
- return 0;
error:
- folio->mapping = NULL;
- /* Leave page->index set: truncation relies upon it */
- folio_put_refs(folio, nr);
- return xas_error(&xas);
+ return xas_error(xas);
+}
+
+noinline int __filemap_add_folio(struct address_space *mapping,
+ struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
+{
+ XA_STATE(xas, &mapping->i_pages, index);
+ bool huge;
+ long nr;
+ int ret;
+
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
+ mapping_set_update(&xas, mapping);
+
+ VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
+ xas_set_order(&xas, index, folio_order(folio));
+ huge = folio_test_hugetlb(folio);
+ nr = folio_nr_pages(folio);
+
+ folio_ref_add(folio, nr);
+ folio->mapping = mapping;
+ folio->index = xas.xa_index;
+
+ ret = __filemap_lock_store(&xas, folio, index, gfp, shadowp);
+ if (!ret) {
+ mapping->nrpages += nr;
+ /* hugetlb pages do not participate in page cache accounting */
+ if (!huge) {
+ __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
+ if (folio_test_pmd_mappable(folio))
+ __lruvec_stat_mod_folio(folio,
+ NR_FILE_THPS, nr);
+ }
+ xas_unlock_irq(&xas);
+ trace_mm_filemap_add_to_page_cache(folio);
+ } else {
+ folio->mapping = NULL;
+ /* Leave page->index set: truncation relies upon it */
+ folio_put_refs(folio, nr);
+ }
+
+ return ret;
}
ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO);
--
2.43.0
next prev parent reply other threads:[~2024-03-26 19:04 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-03-26 18:50 [RFC PATCH 00/10] mm/swap: always use swap cache for synchronization Kairui Song
2024-03-26 18:50 ` Kairui Song [this message]
2024-03-26 18:50 ` [RFC PATCH 02/10] mm/swap: move no readahead swapin code to a stand-alone helper Kairui Song
2024-03-26 18:50 ` [RFC PATCH 03/10] mm/swap: convert swapin_readahead to return a folio Kairui Song
2024-03-26 20:03 ` Matthew Wilcox
2024-03-26 18:50 ` [RFC PATCH 04/10] mm/swap: remove cache bypass swapin Kairui Song
2024-03-27 6:30 ` Huang, Ying
2024-03-27 6:55 ` Kairui Song
2024-03-27 7:29 ` Huang, Ying
2024-03-26 18:50 ` [RFC PATCH 05/10] mm/swap: clean shadow only in unmap path Kairui Song
2024-03-26 18:50 ` [RFC PATCH 06/10] mm/swap: switch to use multi index entries Kairui Song
2024-03-26 18:50 ` [RFC PATCH 07/10] mm/swap: rename __read_swap_cache_async to swap_cache_alloc_or_get Kairui Song
2024-03-26 18:50 ` [RFC PATCH 08/10] mm/swap: use swap cache as a synchronization layer Kairui Song
2024-03-26 18:50 ` [RFC PATCH 09/10] mm/swap: delay the swap cache lookup for swapin Kairui Song
2024-03-26 18:50 ` [RFC PATCH 10/10] mm/swap: optimize synchronous swapin Kairui Song
2024-03-27 6:22 ` Huang, Ying
2024-03-27 6:37 ` Kairui Song
2024-03-27 6:47 ` Huang, Ying
2024-03-27 7:14 ` Kairui Song
2024-03-27 8:16 ` Huang, Ying
2024-03-27 8:08 ` Barry Song
2024-03-27 8:44 ` Kairui Song
2024-03-27 2:52 ` [RFC PATCH 00/10] mm/swap: always use swap cache for synchronization Huang, Ying
2024-03-27 3:01 ` Kairui Song
2024-03-27 8:27 ` Ryan Roberts
2024-03-27 8:32 ` Huang, Ying
2024-03-27 9:39 ` Ryan Roberts
2024-03-27 11:04 ` Kairui Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240326185032.72159-2-ryncsn@gmail.com \
--to=ryncsn@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=chrisl@kernel.org \
--cc=david@redhat.com \
--cc=hannes@cmpxchg.org \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=minchan@kernel.org \
--cc=nphamcs@gmail.com \
--cc=ryan.roberts@arm.com \
--cc=sj@kernel.org \
--cc=v-songbaohua@oppo.com \
--cc=willy@infradead.org \
--cc=ying.huang@intel.com \
--cc=yosryahmed@google.com \
--cc=yuzhao@google.com \
--cc=zhouchengming@bytedance.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox