From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
Kemeng Shi <shikemeng@huaweicloud.com>,
Nhat Pham <nphamcs@gmail.com>, Baoquan He <bhe@redhat.com>,
Barry Song <baohua@kernel.org>,
Johannes Weiner <hannes@cmpxchg.org>,
David Hildenbrand <david@kernel.org>,
Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
Youngjun Park <youngjun.park@lge.com>,
linux-kernel@vger.kernel.org, Chris Li <chrisl@kernel.org>,
Kairui Song <kasong@tencent.com>
Subject: [PATCH v2 07/12] mm, swap: mark bad slots in swap table directly
Date: Wed, 28 Jan 2026 17:28:31 +0800 [thread overview]
Message-ID: <20260128-swap-table-p3-v2-7-fe0b67ef0215@tencent.com> (raw)
In-Reply-To: <20260128-swap-table-p3-v2-0-fe0b67ef0215@tencent.com>
From: Kairui Song <kasong@tencent.com>
In preparing the deprecating swap_map, mark bad slots in the swap table
too when setting SWAP_MAP_BAD in swap_map. Also, refine the swap table
sanity check on freeing to adapt to the bad slots change. For swapoff,
the bad slots count must match the cluster usage count, as nothing
should touch them, and they contribute to the cluster usage count on
swapon. For ordinary swap table freeing, the swap table of clusters with
bad slots should never be freed since the cluster usage count never
reaches zero.
Signed-off-by: Kairui Song <kasong@tencent.com>
---
mm/swapfile.c | 56 +++++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 41 insertions(+), 15 deletions(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index df8b13eecab1..bdce2abd9135 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -454,16 +454,37 @@ static void swap_table_free(struct swap_table *table)
swap_table_free_folio_rcu_cb);
}
+/*
+ * Sanity check to ensure nothing leaked, and the specified range is empty.
+ * One special case is that bad slots can't be freed, so check the number of
+ * bad slots for swapoff, and non-swapoff path must never free bad slots.
+ */
+static void swap_cluster_assert_empty(struct swap_cluster_info *ci, bool swapoff)
+{
+ unsigned int ci_off = 0, ci_end = SWAPFILE_CLUSTER;
+ unsigned long swp_tb;
+ int bad_slots = 0;
+
+ if (!IS_ENABLED(CONFIG_DEBUG_VM) && !swapoff)
+ return;
+
+ do {
+ swp_tb = __swap_table_get(ci, ci_off);
+ if (swp_tb_is_bad(swp_tb))
+ bad_slots++;
+ else
+ WARN_ON_ONCE(!swp_tb_is_null(swp_tb));
+ } while (++ci_off < ci_end);
+
+ WARN_ON_ONCE(bad_slots != (swapoff ? ci->count : 0));
+}
+
static void swap_cluster_free_table(struct swap_cluster_info *ci)
{
- unsigned int ci_off;
struct swap_table *table;
/* Only empty cluster's table is allow to be freed */
lockdep_assert_held(&ci->lock);
- VM_WARN_ON_ONCE(!cluster_is_empty(ci));
- for (ci_off = 0; ci_off < SWAPFILE_CLUSTER; ci_off++)
- VM_WARN_ON_ONCE(!swp_tb_is_null(__swap_table_get(ci, ci_off)));
table = (void *)rcu_dereference_protected(ci->table, true);
rcu_assign_pointer(ci->table, NULL);
@@ -567,6 +588,7 @@ static void swap_cluster_schedule_discard(struct swap_info_struct *si,
static void __free_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci)
{
+ swap_cluster_assert_empty(ci, false);
swap_cluster_free_table(ci);
move_cluster(si, ci, &si->free_clusters, CLUSTER_FLAG_FREE);
ci->order = 0;
@@ -747,9 +769,11 @@ static int swap_cluster_setup_bad_slot(struct swap_info_struct *si,
struct swap_cluster_info *cluster_info,
unsigned int offset, bool mask)
{
+ unsigned int ci_off = offset % SWAPFILE_CLUSTER;
unsigned long idx = offset / SWAPFILE_CLUSTER;
- struct swap_table *table;
struct swap_cluster_info *ci;
+ struct swap_table *table;
+ int ret = 0;
/* si->max may got shrunk by swap swap_activate() */
if (offset >= si->max && !mask) {
@@ -767,13 +791,7 @@ static int swap_cluster_setup_bad_slot(struct swap_info_struct *si,
pr_warn("Empty swap-file\n");
return -EINVAL;
}
- /* Check for duplicated bad swap slots. */
- if (si->swap_map[offset]) {
- pr_warn("Duplicated bad slot offset %d\n", offset);
- return -EINVAL;
- }
- si->swap_map[offset] = SWAP_MAP_BAD;
ci = cluster_info + idx;
if (!ci->table) {
table = swap_table_alloc(GFP_KERNEL);
@@ -781,13 +799,21 @@ static int swap_cluster_setup_bad_slot(struct swap_info_struct *si,
return -ENOMEM;
rcu_assign_pointer(ci->table, table);
}
-
- ci->count++;
+ spin_lock(&ci->lock);
+ /* Check for duplicated bad swap slots. */
+ if (__swap_table_xchg(ci, ci_off, SWP_TB_BAD) != SWP_TB_NULL) {
+ pr_warn("Duplicated bad slot offset %d\n", offset);
+ ret = -EINVAL;
+ } else {
+ si->swap_map[offset] = SWAP_MAP_BAD;
+ ci->count++;
+ }
+ spin_unlock(&ci->lock);
WARN_ON(ci->count > SWAPFILE_CLUSTER);
WARN_ON(ci->flags);
- return 0;
+ return ret;
}
/*
@@ -2743,7 +2769,7 @@ static void free_swap_cluster_info(struct swap_cluster_info *cluster_info,
/* Cluster with bad marks count will have a remaining table */
spin_lock(&ci->lock);
if (rcu_dereference_protected(ci->table, true)) {
- ci->count = 0;
+ swap_cluster_assert_empty(ci, true);
swap_cluster_free_table(ci);
}
spin_unlock(&ci->lock);
--
2.52.0
next prev parent reply other threads:[~2026-01-28 9:31 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-28 9:28 [PATCH v2 00/12] mm, swap: swap table phase III: remove swap_map Kairui Song
2026-01-28 9:28 ` [PATCH v2 01/12] mm, swap: protect si->swap_file properly and use as a mount indicator Kairui Song
2026-01-28 9:28 ` [PATCH v2 02/12] mm, swap: clean up swapon process and locking Kairui Song
2026-01-29 8:35 ` YoungJun Park
2026-02-02 2:31 ` Kairui Song
2026-01-28 9:28 ` [PATCH v2 03/12] mm, swap: remove redundant arguments and locking for enabling a device Kairui Song
2026-01-28 9:28 ` [PATCH v2 04/12] mm, swap: consolidate bad slots setup and make it more robust Kairui Song
2026-01-28 9:28 ` [PATCH v2 05/12] mm/workingset: leave highest bits empty for anon shadow Kairui Song
2026-01-28 9:28 ` [PATCH v2 06/12] mm, swap: implement helpers for reserving data in the swap table Kairui Song
2026-01-29 7:28 ` YoungJun Park
2026-02-02 2:30 ` Kairui Song
2026-01-28 9:28 ` Kairui Song [this message]
2026-01-28 9:28 ` [PATCH v2 08/12] mm, swap: simplify swap table sanity range check Kairui Song
2026-01-28 9:28 ` [PATCH v2 09/12] mm, swap: use the swap table to track the swap count Kairui Song
2026-01-29 7:05 ` YoungJun Park
2026-01-29 8:28 ` YoungJun Park
2026-02-02 3:27 ` Kairui Song
2026-01-28 9:28 ` [PATCH v2 10/12] mm, swap: no need to truncate the scan border Kairui Song
2026-01-28 9:28 ` [PATCH v2 11/12] mm, swap: simplify checking if a folio is swapped Kairui Song
2026-01-28 9:28 ` [PATCH v2 12/12] mm, swap: no need to clear the shadow explicitly Kairui Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260128-swap-table-p3-v2-7-fe0b67ef0215@tencent.com \
--to=ryncsn@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=baohua@kernel.org \
--cc=bhe@redhat.com \
--cc=chrisl@kernel.org \
--cc=david@kernel.org \
--cc=hannes@cmpxchg.org \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=nphamcs@gmail.com \
--cc=shikemeng@huaweicloud.com \
--cc=youngjun.park@lge.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox