From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
Matthew Wilcox <willy@infradead.org>,
Hugh Dickins <hughd@google.com>, Chris Li <chrisl@kernel.org>,
David Hildenbrand <david@redhat.com>,
Yosry Ahmed <yosryahmed@google.com>,
"Huang, Ying" <ying.huang@linux.alibaba.com>,
Nhat Pham <nphamcs@gmail.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Baolin Wang <baolin.wang@linux.alibaba.com>,
Baoquan He <bhe@redhat.com>, Barry Song <baohua@kernel.org>,
Kalesh Singh <kaleshsingh@google.com>,
Kemeng Shi <shikemeng@huaweicloud.com>,
Tim Chen <tim.c.chen@linux.intel.com>,
Ryan Roberts <ryan.roberts@arm.com>,
linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [PATCH 06/28] mm, swap: rearrange swap cluster definition and helpers
Date: Thu, 15 May 2025 04:17:06 +0800 [thread overview]
Message-ID: <20250514201729.48420-7-ryncsn@gmail.com> (raw)
In-Reply-To: <20250514201729.48420-1-ryncsn@gmail.com>
From: Kairui Song <kasong@tencent.com>
No feature change, move all cluster related definition and helpers to
mm/swap.h, also tidy up and add a "swap_" prefix for all cluster
lock/unlock helpers, so they can be better used outside of swap files.
Signed-off-by: Kairui Song <kasong@tencent.com>
---
include/linux/swap.h | 34 ---------------
mm/swap.h | 62 ++++++++++++++++++++++++++
mm/swapfile.c | 102 +++++++++++++------------------------------
3 files changed, 92 insertions(+), 106 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0e52ac4e817d..1e7d9d55c39a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -234,40 +234,6 @@ enum {
/* Special value in each swap_map continuation */
#define SWAP_CONT_MAX 0x7f /* Max count */
-/*
- * We use this to track usage of a cluster. A cluster is a block of swap disk
- * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
- * free clusters are organized into a list. We fetch an entry from the list to
- * get a free cluster.
- *
- * The flags field determines if a cluster is free. This is
- * protected by cluster lock.
- */
-struct swap_cluster_info {
- spinlock_t lock; /*
- * Protect swap_cluster_info fields
- * other than list, and swap_info_struct->swap_map
- * elements corresponding to the swap cluster.
- */
- u16 count;
- u8 flags;
- u8 order;
- struct list_head list;
-};
-
-/* All on-list cluster must have a non-zero flag. */
-enum swap_cluster_flags {
- CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
- CLUSTER_FLAG_FREE,
- CLUSTER_FLAG_NONFULL,
- CLUSTER_FLAG_FRAG,
- /* Clusters with flags above are allocatable */
- CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
- CLUSTER_FLAG_FULL,
- CLUSTER_FLAG_DISCARD,
- CLUSTER_FLAG_MAX,
-};
-
/*
* The first page in the swap file is the swap header, which is always marked
* bad to prevent it from being allocated as an entry. This also prevents the
diff --git a/mm/swap.h b/mm/swap.h
index 34af06bf6fa4..38d37d241f1c 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -5,10 +5,72 @@
struct mempolicy;
extern int page_cluster;
+#ifdef CONFIG_THP_SWAP
+#define SWAPFILE_CLUSTER HPAGE_PMD_NR
+#define swap_entry_order(order) (order)
+#else
+#define SWAPFILE_CLUSTER 256
+#define swap_entry_order(order) 0
+#endif
+
+/*
+ * We use this to track usage of a cluster. A cluster is a block of swap disk
+ * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
+ * free clusters are organized into a list. We fetch an entry from the list to
+ * get a free cluster.
+ *
+ * The flags field determines if a cluster is free. This is
+ * protected by cluster lock.
+ */
+struct swap_cluster_info {
+ spinlock_t lock; /*
+ * Protect swap_cluster_info fields
+ * other than list, and swap_info_struct->swap_map
+ * elements corresponding to the swap cluster.
+ */
+ u16 count;
+ u8 flags;
+ u8 order;
+ struct list_head list;
+};
+
+/* All on-list cluster must have a non-zero flag. */
+enum swap_cluster_flags {
+ CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
+ CLUSTER_FLAG_FREE,
+ CLUSTER_FLAG_NONFULL,
+ CLUSTER_FLAG_FRAG,
+ /* Clusters with flags above are allocatable */
+ CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
+ CLUSTER_FLAG_FULL,
+ CLUSTER_FLAG_DISCARD,
+ CLUSTER_FLAG_MAX,
+};
+
#ifdef CONFIG_SWAP
#include <linux/swapops.h> /* for swp_offset */
#include <linux/blk_types.h> /* for bio_end_io_t */
+static inline struct swap_cluster_info *swp_offset_cluster(
+ struct swap_info_struct *si, pgoff_t offset)
+{
+ return &si->cluster_info[offset / SWAPFILE_CLUSTER];
+}
+
+static inline struct swap_cluster_info *swap_lock_cluster(
+ struct swap_info_struct *si,
+ unsigned long offset)
+{
+ struct swap_cluster_info *ci = swp_offset_cluster(si, offset);
+ spin_lock(&ci->lock);
+ return ci;
+}
+
+static inline void swap_unlock_cluster(struct swap_cluster_info *ci)
+{
+ spin_unlock(&ci->lock);
+}
+
/* linux/mm/page_io.c */
int sio_pool_init(void);
struct swap_iocb;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index aa031fd27847..ba3fd99eb5fa 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -58,9 +58,6 @@ static void swap_entries_free(struct swap_info_struct *si,
static void swap_range_alloc(struct swap_info_struct *si,
unsigned int nr_entries);
static bool folio_swapcache_freeable(struct folio *folio);
-static struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
- unsigned long offset);
-static inline void unlock_cluster(struct swap_cluster_info *ci);
static DEFINE_SPINLOCK(swap_lock);
static unsigned int nr_swapfiles;
@@ -259,9 +256,9 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
* swap_map is HAS_CACHE only, which means the slots have no page table
* reference or pending writeback, and can't be allocated to others.
*/
- ci = lock_cluster(si, offset);
+ ci = swap_lock_cluster(si, offset);
need_reclaim = swap_only_has_cache(si, offset, nr_pages);
- unlock_cluster(ci);
+ swap_unlock_cluster(ci);
if (!need_reclaim)
goto out_unlock;
@@ -386,21 +383,6 @@ static void discard_swap_cluster(struct swap_info_struct *si,
}
}
-#ifdef CONFIG_THP_SWAP
-#define SWAPFILE_CLUSTER HPAGE_PMD_NR
-
-#define swap_entry_order(order) (order)
-#else
-#define SWAPFILE_CLUSTER 256
-
-/*
- * Define swap_entry_order() as constant to let compiler to optimize
- * out some code if !CONFIG_THP_SWAP
- */
-#define swap_entry_order(order) 0
-#endif
-#define LATENCY_LIMIT 256
-
static inline bool cluster_is_empty(struct swap_cluster_info *info)
{
return info->count == 0;
@@ -426,34 +408,12 @@ static inline unsigned int cluster_index(struct swap_info_struct *si,
return ci - si->cluster_info;
}
-static inline struct swap_cluster_info *offset_to_cluster(struct swap_info_struct *si,
- unsigned long offset)
-{
- return &si->cluster_info[offset / SWAPFILE_CLUSTER];
-}
-
static inline unsigned int cluster_offset(struct swap_info_struct *si,
struct swap_cluster_info *ci)
{
return cluster_index(si, ci) * SWAPFILE_CLUSTER;
}
-static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
- unsigned long offset)
-{
- struct swap_cluster_info *ci;
-
- ci = offset_to_cluster(si, offset);
- spin_lock(&ci->lock);
-
- return ci;
-}
-
-static inline void unlock_cluster(struct swap_cluster_info *ci)
-{
- spin_unlock(&ci->lock);
-}
-
static void move_cluster(struct swap_info_struct *si,
struct swap_cluster_info *ci, struct list_head *list,
enum swap_cluster_flags new_flags)
@@ -809,7 +769,7 @@ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si,
}
out:
relocate_cluster(si, ci);
- unlock_cluster(ci);
+ swap_unlock_cluster(ci);
if (si->flags & SWP_SOLIDSTATE) {
this_cpu_write(percpu_swap_cluster.offset[order], next);
this_cpu_write(percpu_swap_cluster.si[order], si);
@@ -853,7 +813,7 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force)
if (ci->flags == CLUSTER_FLAG_NONE)
relocate_cluster(si, ci);
- unlock_cluster(ci);
+ swap_unlock_cluster(ci);
if (to_scan <= 0)
break;
}
@@ -889,10 +849,8 @@ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int o
/* Serialize HDD SWAP allocation for each device. */
spin_lock(&si->global_cluster_lock);
offset = si->global_cluster->next[order];
- if (offset == SWAP_ENTRY_INVALID)
- goto new_cluster;
- ci = lock_cluster(si, offset);
+ ci = swap_lock_cluster(si, offset);
/* Cluster could have been used by another order */
if (cluster_is_usable(ci, order)) {
if (cluster_is_empty(ci))
@@ -900,7 +858,7 @@ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int o
found = alloc_swap_scan_cluster(si, ci, offset,
order, usage);
} else {
- unlock_cluster(ci);
+ swap_unlock_cluster(ci);
}
if (found)
goto done;
@@ -1178,7 +1136,7 @@ static bool swap_alloc_fast(swp_entry_t *entry,
if (!si || !offset || !get_swap_device_info(si))
return false;
- ci = lock_cluster(si, offset);
+ ci = swap_lock_cluster(si, offset);
if (cluster_is_usable(ci, order)) {
if (cluster_is_empty(ci))
offset = cluster_offset(si, ci);
@@ -1186,7 +1144,7 @@ static bool swap_alloc_fast(swp_entry_t *entry,
if (found)
*entry = swp_entry(si->type, found);
} else {
- unlock_cluster(ci);
+ swap_unlock_cluster(ci);
}
put_swap_device(si);
@@ -1449,14 +1407,14 @@ static void swap_entries_put_cache(struct swap_info_struct *si,
unsigned long offset = swp_offset(entry);
struct swap_cluster_info *ci;
- ci = lock_cluster(si, offset);
- if (swap_only_has_cache(si, offset, nr))
+ ci = swap_lock_cluster(si, offset);
+ if (swap_only_has_cache(si, offset, nr)) {
swap_entries_free(si, ci, entry, nr);
- else {
+ } else {
for (int i = 0; i < nr; i++, entry.val++)
swap_entry_put_locked(si, ci, entry, SWAP_HAS_CACHE);
}
- unlock_cluster(ci);
+ swap_unlock_cluster(ci);
}
static bool swap_entries_put_map(struct swap_info_struct *si,
@@ -1474,7 +1432,7 @@ static bool swap_entries_put_map(struct swap_info_struct *si,
if (count != 1)
goto fallback;
- ci = lock_cluster(si, offset);
+ ci = swap_lock_cluster(si, offset);
if (!swap_is_last_map(si, offset, nr, &has_cache)) {
goto locked_fallback;
}
@@ -1483,21 +1441,20 @@ static bool swap_entries_put_map(struct swap_info_struct *si,
else
for (i = 0; i < nr; i++)
WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE);
- unlock_cluster(ci);
+ swap_unlock_cluster(ci);
return has_cache;
fallback:
- ci = lock_cluster(si, offset);
+ ci = swap_lock_cluster(si, offset);
locked_fallback:
for (i = 0; i < nr; i++, entry.val++) {
count = swap_entry_put_locked(si, ci, entry, 1);
if (count == SWAP_HAS_CACHE)
has_cache = true;
}
- unlock_cluster(ci);
+ swap_unlock_cluster(ci);
return has_cache;
-
}
/*
@@ -1545,7 +1502,7 @@ static void swap_entries_free(struct swap_info_struct *si,
unsigned char *map_end = map + nr_pages;
/* It should never free entries across different clusters */
- VM_BUG_ON(ci != offset_to_cluster(si, offset + nr_pages - 1));
+ VM_BUG_ON(ci != swp_offset_cluster(si, offset + nr_pages - 1));
VM_BUG_ON(cluster_is_empty(ci));
VM_BUG_ON(ci->count < nr_pages);
@@ -1620,9 +1577,9 @@ bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry)
struct swap_cluster_info *ci;
int count;
- ci = lock_cluster(si, offset);
+ ci = swap_lock_cluster(si, offset);
count = swap_count(si->swap_map[offset]);
- unlock_cluster(ci);
+ swap_unlock_cluster(ci);
return !!count;
}
@@ -1645,7 +1602,7 @@ int swp_swapcount(swp_entry_t entry)
offset = swp_offset(entry);
- ci = lock_cluster(si, offset);
+ ci = swap_lock_cluster(si, offset);
count = swap_count(si->swap_map[offset]);
if (!(count & COUNT_CONTINUED))
@@ -1668,7 +1625,7 @@ int swp_swapcount(swp_entry_t entry)
n *= (SWAP_CONT_MAX + 1);
} while (tmp_count & COUNT_CONTINUED);
out:
- unlock_cluster(ci);
+ swap_unlock_cluster(ci);
return count;
}
@@ -1683,7 +1640,7 @@ static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
int i;
bool ret = false;
- ci = lock_cluster(si, offset);
+ ci = swap_lock_cluster(si, offset);
if (nr_pages == 1) {
if (swap_count(map[roffset]))
ret = true;
@@ -1696,7 +1653,7 @@ static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
}
}
unlock_out:
- unlock_cluster(ci);
+ swap_unlock_cluster(ci);
return ret;
}
@@ -2246,6 +2203,7 @@ static int unuse_mm(struct mm_struct *mm, unsigned int type)
* Return 0 if there are no inuse entries after prev till end of
* the map.
*/
+#define LATENCY_LIMIT 256
static unsigned int find_next_to_unuse(struct swap_info_struct *si,
unsigned int prev)
{
@@ -2629,8 +2587,8 @@ static void wait_for_allocation(struct swap_info_struct *si)
BUG_ON(si->flags & SWP_WRITEOK);
for (offset = 0; offset < end; offset += SWAPFILE_CLUSTER) {
- ci = lock_cluster(si, offset);
- unlock_cluster(ci);
+ ci = swap_lock_cluster(si, offset);
+ swap_unlock_cluster(ci);
}
}
@@ -3533,7 +3491,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
offset = swp_offset(entry);
VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER);
- ci = lock_cluster(si, offset);
+ ci = swap_lock_cluster(si, offset);
err = 0;
for (i = 0; i < nr; i++) {
@@ -3588,7 +3546,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
}
unlock_out:
- unlock_cluster(ci);
+ swap_unlock_cluster(ci);
return err;
}
@@ -3688,7 +3646,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
offset = swp_offset(entry);
- ci = lock_cluster(si, offset);
+ ci = swap_lock_cluster(si, offset);
count = swap_count(si->swap_map[offset]);
@@ -3748,7 +3706,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
out_unlock_cont:
spin_unlock(&si->cont_lock);
out:
- unlock_cluster(ci);
+ swap_unlock_cluster(ci);
put_swap_device(si);
outer:
if (page)
--
2.49.0
next prev parent reply other threads:[~2025-05-14 20:18 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-14 20:17 [PATCH 00/28] mm, swap: introduce swap table Kairui Song
2025-05-14 20:17 ` [PATCH 01/28] mm, swap: don't scan every fragment cluster Kairui Song
2025-05-14 20:17 ` [PATCH 02/28] mm, swap: consolidate the helper for mincore Kairui Song
2025-05-14 20:17 ` [PATCH 03/28] mm/shmem, swap: remove SWAP_MAP_SHMEM Kairui Song
2025-05-14 20:17 ` [PATCH 04/28] mm, swap: split readahead update out of swap cache lookup Kairui Song
2025-05-14 20:17 ` [PATCH 05/28] mm, swap: sanitize swap cache lookup convention Kairui Song
2025-05-19 4:38 ` Barry Song
2025-05-20 3:31 ` Kairui Song
2025-05-20 4:41 ` Barry Song
2025-05-20 19:09 ` Kairui Song
2025-05-20 22:33 ` Barry Song
2025-05-21 2:45 ` Kairui Song
2025-05-21 3:24 ` Barry Song
2025-05-23 2:29 ` Barry Song
2025-05-23 20:01 ` Kairui Song
2025-05-27 7:58 ` Barry Song
2025-05-27 15:11 ` Kairui Song
2025-05-30 8:49 ` Kairui Song
2025-05-30 19:24 ` Kairui Song
2025-05-14 20:17 ` Kairui Song [this message]
2025-05-19 6:26 ` [PATCH 06/28] mm, swap: rearrange swap cluster definition and helpers Barry Song
2025-05-20 3:50 ` Kairui Song
2025-05-14 20:17 ` [PATCH 07/28] mm, swap: tidy up swap device and cluster info helpers Kairui Song
2025-05-14 20:17 ` [PATCH 08/28] mm, swap: use swap table for the swap cache and switch API Kairui Song
2025-05-14 20:17 ` [PATCH 09/28] mm/swap: rename __read_swap_cache_async to __swapin_cache_alloc Kairui Song
2025-05-14 20:17 ` [PATCH 10/28] mm, swap: add a swap helper for bypassing only read ahead Kairui Song
2025-05-14 20:17 ` [PATCH 11/28] mm, swap: clean up and consolidate helper for mTHP swapin check Kairui Song
2025-05-15 9:31 ` Klara Modin
2025-05-15 9:39 ` Kairui Song
2025-05-19 7:08 ` Barry Song
2025-05-19 11:09 ` Kairui Song
2025-05-19 11:57 ` Barry Song
2025-05-14 20:17 ` [PATCH 12/28] mm, swap: never bypass the swap cache for SWP_SYNCHRONOUS_IO Kairui Song
2025-05-14 20:17 ` [PATCH 13/28] mm/shmem, swap: avoid redundant Xarray lookup during swapin Kairui Song
2025-05-14 20:17 ` [PATCH 14/28] mm/shmem: never bypass the swap cache for SWP_SYNCHRONOUS_IO Kairui Song
2025-05-14 20:17 ` [PATCH 15/28] mm, swap: split locked entry freeing into a standalone helper Kairui Song
2025-05-14 20:17 ` [PATCH 16/28] mm, swap: use swap cache as the swap in synchronize layer Kairui Song
2025-05-14 20:17 ` [PATCH 17/28] mm, swap: sanitize swap entry management workflow Kairui Song
2025-05-14 20:17 ` [PATCH 18/28] mm, swap: rename and introduce folio_free_swap_cache Kairui Song
2025-05-14 20:17 ` [PATCH 19/28] mm, swap: clean up and improve swap entries batch freeing Kairui Song
2025-05-14 20:17 ` [PATCH 20/28] mm, swap: check swap table directly for checking cache Kairui Song
2025-06-19 10:38 ` Baoquan He
2025-06-19 10:50 ` Kairui Song
2025-06-20 8:04 ` Baoquan He
2025-05-14 20:17 ` [PATCH 21/28] mm, swap: add folio to swap cache directly on allocation Kairui Song
2025-05-14 20:17 ` [PATCH 22/28] mm, swap: drop the SWAP_HAS_CACHE flag Kairui Song
2025-05-14 20:17 ` [PATCH 23/28] mm, swap: remove no longer needed _swap_info_get Kairui Song
2025-05-14 20:17 ` [PATCH 24/28] mm, swap: implement helpers for reserving data in swap table Kairui Song
2025-05-15 9:40 ` Klara Modin
2025-05-16 2:35 ` Kairui Song
2025-05-14 20:17 ` [PATCH 25/28] mm/workingset: leave highest 8 bits empty for anon shadow Kairui Song
2025-05-14 20:17 ` [PATCH 26/28] mm, swap: minor clean up for swapon Kairui Song
2025-05-14 20:17 ` [PATCH 27/28] mm, swap: use swap table to track swap count Kairui Song
2025-05-14 20:17 ` [PATCH 28/28] mm, swap: implement dynamic allocation of swap table Kairui Song
2025-05-21 18:36 ` Nhat Pham
2025-05-22 4:13 ` Kairui Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250514201729.48420-7-ryncsn@gmail.com \
--to=ryncsn@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=baohua@kernel.org \
--cc=baolin.wang@linux.alibaba.com \
--cc=bhe@redhat.com \
--cc=chrisl@kernel.org \
--cc=david@redhat.com \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kaleshsingh@google.com \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=nphamcs@gmail.com \
--cc=ryan.roberts@arm.com \
--cc=shikemeng@huaweicloud.com \
--cc=tim.c.chen@linux.intel.com \
--cc=willy@infradead.org \
--cc=ying.huang@linux.alibaba.com \
--cc=yosryahmed@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox