From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
Chris Li <chrisl@kernel.org>, Barry Song <v-songbaohua@oppo.com>,
Ryan Roberts <ryan.roberts@arm.com>,
Hugh Dickins <hughd@google.com>,
Yosry Ahmed <yosryahmed@google.com>,
"Huang, Ying" <ying.huang@linux.alibaba.com>,
Nhat Pham <nphamcs@gmail.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Kalesh Singh <kaleshsingh@google.com>,
linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [PATCH v3 04/13] mm, swap: use cluster lock for HDD
Date: Tue, 31 Dec 2024 01:46:12 +0800 [thread overview]
Message-ID: <20241230174621.61185-5-ryncsn@gmail.com> (raw)
In-Reply-To: <20241230174621.61185-1-ryncsn@gmail.com>
From: Kairui Song <kasong@tencent.com>
Cluster lock (ci->lock) was introduce to reduce contention for certain
operations. Using cluster lock for HDD is not helpful as HDD have a poor
performance, so locking isn't the bottleneck. But having different set
of locks for HDD / non-HDD prevents further rework of device lock
(si->lock).
This commit just changed all lock_cluster_or_swap_info to lock_cluster,
which is a safe and straight conversion since cluster info is always
allocated now, also removed all cluster_info related checks.
Suggested-by: Chris Li <chrisl@kernel.org>
Signed-off-by: Kairui Song <kasong@tencent.com>
---
mm/swapfile.c | 107 ++++++++++++++++----------------------------------
1 file changed, 34 insertions(+), 73 deletions(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index fca58d43b836..d0e5b9fa0c48 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -58,10 +58,9 @@ static void swap_entry_range_free(struct swap_info_struct *si, swp_entry_t entry
static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset,
unsigned int nr_entries);
static bool folio_swapcache_freeable(struct folio *folio);
-static struct swap_cluster_info *lock_cluster_or_swap_info(
- struct swap_info_struct *si, unsigned long offset);
-static void unlock_cluster_or_swap_info(struct swap_info_struct *si,
- struct swap_cluster_info *ci);
+static struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
+ unsigned long offset);
+static void unlock_cluster(struct swap_cluster_info *ci);
static DEFINE_SPINLOCK(swap_lock);
static unsigned int nr_swapfiles;
@@ -222,9 +221,9 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
* swap_map is HAS_CACHE only, which means the slots have no page table
* reference or pending writeback, and can't be allocated to others.
*/
- ci = lock_cluster_or_swap_info(si, offset);
+ ci = lock_cluster(si, offset);
need_reclaim = swap_is_has_cache(si, offset, nr_pages);
- unlock_cluster_or_swap_info(si, ci);
+ unlock_cluster(ci);
if (!need_reclaim)
goto out_unlock;
@@ -404,45 +403,15 @@ static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si
{
struct swap_cluster_info *ci;
- ci = si->cluster_info;
- if (ci) {
- ci += offset / SWAPFILE_CLUSTER;
- spin_lock(&ci->lock);
- }
- return ci;
-}
-
-static inline void unlock_cluster(struct swap_cluster_info *ci)
-{
- if (ci)
- spin_unlock(&ci->lock);
-}
-
-/*
- * Determine the locking method in use for this device. Return
- * swap_cluster_info if SSD-style cluster-based locking is in place.
- */
-static inline struct swap_cluster_info *lock_cluster_or_swap_info(
- struct swap_info_struct *si, unsigned long offset)
-{
- struct swap_cluster_info *ci;
-
- /* Try to use fine-grained SSD-style locking if available: */
- ci = lock_cluster(si, offset);
- /* Otherwise, fall back to traditional, coarse locking: */
- if (!ci)
- spin_lock(&si->lock);
+ ci = &si->cluster_info[offset / SWAPFILE_CLUSTER];
+ spin_lock(&ci->lock);
return ci;
}
-static inline void unlock_cluster_or_swap_info(struct swap_info_struct *si,
- struct swap_cluster_info *ci)
+static inline void unlock_cluster(struct swap_cluster_info *ci)
{
- if (ci)
- unlock_cluster(ci);
- else
- spin_unlock(&si->lock);
+ spin_unlock(&ci->lock);
}
/* Add a cluster to discard list and schedule it to do discard */
@@ -558,9 +527,6 @@ static void inc_cluster_info_page(struct swap_info_struct *si,
unsigned long idx = page_nr / SWAPFILE_CLUSTER;
struct swap_cluster_info *ci;
- if (!cluster_info)
- return;
-
ci = cluster_info + idx;
ci->count++;
@@ -576,9 +542,6 @@ static void inc_cluster_info_page(struct swap_info_struct *si,
static void dec_cluster_info_page(struct swap_info_struct *si,
struct swap_cluster_info *ci, int nr_pages)
{
- if (!si->cluster_info)
- return;
-
VM_BUG_ON(ci->count < nr_pages);
VM_BUG_ON(cluster_is_free(ci));
lockdep_assert_held(&si->lock);
@@ -1007,8 +970,6 @@ static int cluster_alloc_swap(struct swap_info_struct *si,
{
int n_ret = 0;
- VM_BUG_ON(!si->cluster_info);
-
si->flags += SWP_SCANNING;
while (n_ret < nr) {
@@ -1052,10 +1013,10 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
}
/*
- * Swapfile is not block device or not using clusters so unable
+ * Swapfile is not block device so unable
* to allocate large entries.
*/
- if (!(si->flags & SWP_BLKDEV) || !si->cluster_info)
+ if (!(si->flags & SWP_BLKDEV))
return 0;
}
@@ -1295,9 +1256,9 @@ static unsigned char __swap_entry_free(struct swap_info_struct *si,
unsigned long offset = swp_offset(entry);
unsigned char usage;
- ci = lock_cluster_or_swap_info(si, offset);
+ ci = lock_cluster(si, offset);
usage = __swap_entry_free_locked(si, offset, 1);
- unlock_cluster_or_swap_info(si, ci);
+ unlock_cluster(ci);
if (!usage)
free_swap_slot(entry);
@@ -1320,14 +1281,14 @@ static bool __swap_entries_free(struct swap_info_struct *si,
if (nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER)
goto fallback;
- ci = lock_cluster_or_swap_info(si, offset);
+ ci = lock_cluster(si, offset);
if (!swap_is_last_map(si, offset, nr, &has_cache)) {
- unlock_cluster_or_swap_info(si, ci);
+ unlock_cluster(ci);
goto fallback;
}
for (i = 0; i < nr; i++)
WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE);
- unlock_cluster_or_swap_info(si, ci);
+ unlock_cluster(ci);
if (!has_cache) {
for (i = 0; i < nr; i++)
@@ -1383,7 +1344,7 @@ static void cluster_swap_free_nr(struct swap_info_struct *si,
DECLARE_BITMAP(to_free, BITS_PER_LONG) = { 0 };
int i, nr;
- ci = lock_cluster_or_swap_info(si, offset);
+ ci = lock_cluster(si, offset);
while (nr_pages) {
nr = min(BITS_PER_LONG, nr_pages);
for (i = 0; i < nr; i++) {
@@ -1391,18 +1352,18 @@ static void cluster_swap_free_nr(struct swap_info_struct *si,
bitmap_set(to_free, i, 1);
}
if (!bitmap_empty(to_free, BITS_PER_LONG)) {
- unlock_cluster_or_swap_info(si, ci);
+ unlock_cluster(ci);
for_each_set_bit(i, to_free, BITS_PER_LONG)
free_swap_slot(swp_entry(si->type, offset + i));
if (nr == nr_pages)
return;
bitmap_clear(to_free, 0, BITS_PER_LONG);
- ci = lock_cluster_or_swap_info(si, offset);
+ ci = lock_cluster(si, offset);
}
offset += nr;
nr_pages -= nr;
}
- unlock_cluster_or_swap_info(si, ci);
+ unlock_cluster(ci);
}
/*
@@ -1441,9 +1402,9 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
if (!si)
return;
- ci = lock_cluster_or_swap_info(si, offset);
+ ci = lock_cluster(si, offset);
if (size > 1 && swap_is_has_cache(si, offset, size)) {
- unlock_cluster_or_swap_info(si, ci);
+ unlock_cluster(ci);
spin_lock(&si->lock);
swap_entry_range_free(si, entry, size);
spin_unlock(&si->lock);
@@ -1451,14 +1412,14 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
}
for (int i = 0; i < size; i++, entry.val++) {
if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) {
- unlock_cluster_or_swap_info(si, ci);
+ unlock_cluster(ci);
free_swap_slot(entry);
if (i == size - 1)
return;
- lock_cluster_or_swap_info(si, offset);
+ lock_cluster(si, offset);
}
}
- unlock_cluster_or_swap_info(si, ci);
+ unlock_cluster(ci);
}
static int swp_entry_cmp(const void *ent1, const void *ent2)
@@ -1522,9 +1483,9 @@ int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
struct swap_cluster_info *ci;
int count;
- ci = lock_cluster_or_swap_info(si, offset);
+ ci = lock_cluster(si, offset);
count = swap_count(si->swap_map[offset]);
- unlock_cluster_or_swap_info(si, ci);
+ unlock_cluster(ci);
return count;
}
@@ -1547,7 +1508,7 @@ int swp_swapcount(swp_entry_t entry)
offset = swp_offset(entry);
- ci = lock_cluster_or_swap_info(si, offset);
+ ci = lock_cluster(si, offset);
count = swap_count(si->swap_map[offset]);
if (!(count & COUNT_CONTINUED))
@@ -1570,7 +1531,7 @@ int swp_swapcount(swp_entry_t entry)
n *= (SWAP_CONT_MAX + 1);
} while (tmp_count & COUNT_CONTINUED);
out:
- unlock_cluster_or_swap_info(si, ci);
+ unlock_cluster(ci);
return count;
}
@@ -1585,8 +1546,8 @@ static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
int i;
bool ret = false;
- ci = lock_cluster_or_swap_info(si, offset);
- if (!ci || nr_pages == 1) {
+ ci = lock_cluster(si, offset);
+ if (nr_pages == 1) {
if (swap_count(map[roffset]))
ret = true;
goto unlock_out;
@@ -1598,7 +1559,7 @@ static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
}
}
unlock_out:
- unlock_cluster_or_swap_info(si, ci);
+ unlock_cluster(ci);
return ret;
}
@@ -3428,7 +3389,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
offset = swp_offset(entry);
VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER);
VM_WARN_ON(usage == 1 && nr > 1);
- ci = lock_cluster_or_swap_info(si, offset);
+ ci = lock_cluster(si, offset);
err = 0;
for (i = 0; i < nr; i++) {
@@ -3483,7 +3444,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
}
unlock_out:
- unlock_cluster_or_swap_info(si, ci);
+ unlock_cluster(ci);
return err;
}
--
2.47.1
next prev parent reply other threads:[~2024-12-30 17:46 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-30 17:46 [PATCH v3 00/13] mm, swap: rework of swap allocator locks Kairui Song
2024-12-30 17:46 ` [PATCH v3 01/13] mm, swap: minor clean up for swap entry allocation Kairui Song
2025-01-09 4:04 ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 02/13] mm, swap: fold swap_info_get_cont in the only caller Kairui Song
2025-01-09 4:05 ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 03/13] mm, swap: remove old allocation path for HDD Kairui Song
2025-01-09 4:06 ` Baoquan He
2024-12-30 17:46 ` Kairui Song [this message]
2025-01-09 4:07 ` [PATCH v3 04/13] mm, swap: use cluster lock " Baoquan He
2024-12-30 17:46 ` [PATCH v3 05/13] mm, swap: clean up device availability check Kairui Song
2025-01-09 4:08 ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 06/13] mm, swap: clean up plist removal and adding Kairui Song
2025-01-02 8:59 ` Baoquan He
2025-01-03 8:07 ` Kairui Song
2024-12-30 17:46 ` [PATCH v3 07/13] mm, swap: hold a reference during scan and cleanup flag usage Kairui Song
2025-01-04 5:46 ` Baoquan He
2025-01-13 5:34 ` Kairui Song
2025-01-20 2:39 ` Baoquan He
2025-01-27 9:19 ` Kairui Song
2025-02-05 9:18 ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 08/13] mm, swap: use an enum to define all cluster flags and wrap flags changes Kairui Song
2025-01-06 8:43 ` Baoquan He
2025-01-13 5:49 ` Kairui Song
2024-12-30 17:46 ` [PATCH v3 09/13] mm, swap: reduce contention on device lock Kairui Song
2025-01-06 10:12 ` Baoquan He
2025-01-08 11:09 ` Baoquan He
2025-01-09 2:15 ` Kairui Song
2025-01-10 11:23 ` Baoquan He
2025-01-13 6:33 ` Kairui Song
2025-01-13 8:07 ` Kairui Song
2024-12-30 17:46 ` [PATCH v3 10/13] mm, swap: simplify percpu cluster updating Kairui Song
2025-01-09 2:07 ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 11/13] mm, swap: introduce a helper for retrieving cluster from offset Kairui Song
2024-12-30 17:46 ` [PATCH v3 12/13] mm, swap: use a global swap cluster for non-rotation devices Kairui Song
2024-12-30 17:46 ` [PATCH v3 13/13] mm, swap_slots: remove slot cache for freeing path Kairui Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241230174621.61185-5-ryncsn@gmail.com \
--to=ryncsn@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=chrisl@kernel.org \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kaleshsingh@google.com \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=nphamcs@gmail.com \
--cc=ryan.roberts@arm.com \
--cc=v-songbaohua@oppo.com \
--cc=ying.huang@linux.alibaba.com \
--cc=yosryahmed@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox